/*
 *	recorder8.c 
 *
 *	model a recorder
 *	12-21-11
 *
 *	usage: recorder4
 *		in this case results are written to a subdirectory named "results"
 *	or
 *	usage: recorder4 dir
 *		in this case results are written to a subdirectory named "dir"
 *	or
 *	usage: recorder4 -h
 *		to get help with command line args
 *
 *	usage: recorder4 -restart dir
 *		reads map data and time from files and continues a previous calc 
 *		the files are t_restart", rho_map_restart, u_map_restart, and v_map_restart
 *		 
 *	usage: recorder4 test
 *		generates the geometry file and then exits
 *
 *	NOTE: see below (after recorder7a) for major changes in the command line
 *
 *	2D simulation
 *	use explicit MacCormack method
 *
 *	v3 uses nonslip boundary conditions for the recorder surfaces
 *	and absorbing boundaries at the walls of the region
 *
 *	this version is prepared for amdahl 01-03-12
 *		- found some mistakes in the version running the past
 *		few days on my laptop, but those results are still ok
 *
 *	v4 adds second order artifical viscosity as described by Kuruvila and Anderson
 *	
 *	v5 adds fourth order viscosity patterned roughly after Jameson
 *
 *	v6 makes the second and fourth order viscosity follow Jameson more closely (with
 *		a switch of the 4th order term), but not exactly
 *
 *	v6b follow the Jameson-Turkel relations exactly, except near boundaries where
 *		there is only 2nd order dissipation
 * 
 *	v6c add the ability to turn ramp blowing speed to zero at end of a tone
 *
 *	v6d change the boundary conditions at the outer edges -- use an impedance
 *		Z = Z_1 + i omega Z_2
 *
 *	v6e same as v6d but change the way I numerically implement that impedance 
 *		condition at the edges of the boundary
 *
 *	v6f add the ability to filter out high frequencies by periodically time averaging the spatial
 *		maps of rho, u, and v
 *		the averaging will be specified by 3 parameters:
 *			t_accum = time period to accumulate data for averaging 
 *			t_accum_interval = how often to accum for filtering (t_accum_interval > t_accum)
 *			t_accum_start = when to start accumulating 
 *
 *	v6g add a flag so that can toggle boundary condition on simulation region between having
 *		an impedance Z or having rho = u = v = 0 (as in v6c and previous)
 *		make a few small tweaks to filtering routines on 3-11-12
 *
 *	v7a -- still 2D but now allow for a nonuniform cartesian grid
 *		The initialization files now change a bit
 *		run_data.dat is unchanged - it contains all the non-geometry information
 *		The geometry files are generated by a pre-processing programm geometry_setup.c
 *		The grid information is stored in files x_grid.dat and y_grid.dat
 *			The format for these files is
 *			i1	x1	dx1
 *			i2	x2	dx2
 *			etc. for the rest of the x file and for y
 *			where i1 is the grid index, dx1 is the distance between grid points i1 and i2
 *			and x1 is the absolute location of grid index point i1
 *
 *		The spatial grid/geometry is stored in three files:
 *			geometry_solid.dat contains all the points on the boundary and in and on the recorder
 *			geometry_initialize.dat contains points where u is imposed (initialized) in the channel
 *			geometry_record.dat contains the three points where the sound is recorded
 *			The format for all of these files is in terms of grid indices
 *				i1	j1
 *				i2	j2
 *				etc.
 *			The actual spatial location of these points can then be found from the grid info
 *			in x_grid.dat and y_grid.dat
 *
 *		The initialization info for u in the channel is stored in u_init.dat
 *			The format is just one line with the values of u starting at the lower edge of the
 *			channel and proceeding to the upper edge
 *
 *		The files x_grid.dat, y_grid.dat, geometry_solid.dat, geometry_initialize.dat, geometry_sound.dat
 *			and u_init.dat are all found in a directory (<input_directory>) named on the
 *			argument line of recorder7a.
 *
 *		The other information needed is run_data.dat which is taken from the current directory
 *			Files needed for a restart (t_restart, rho_map_restart, u_map_restart, and v_map_restart)
 *			are also read from the current directory
 *
 *		All output files are written to the <output_directory>
 *
 *		NEW USAGE:
 *			recorder7a
 *			or
 *			recorder7a input_directory output_directory
 *			or
 *			recorder7a -restart input_directory output_directory
 *
 *			with no command line arguments gives just the usage information
 *
 *			input_directory = directory from which all the input files are read
 *			including the geometry, the blowing conditions, and restart information (if needed)
 *
 *			-restart indicates that this is a continuation of a previous run
 *
 *			output_directory = directory into which all the output files are written
 *
 *	recorder8:	start work on three dimensional version		
 *			this test version is still 2D, but I am cutting back on the number of arrays
 *			and their sizes - once this is tested I will move on to 3D
 *
 *			note a major change to the way the map files for rho, u, v, and w are saved
 *			all of the info is now in one file rather than in separate files
 *
 *			there are other big changes necessitated by going to 3 dimensions
 *
 *		usage:
 *			recorder8 input_directory master_output_directory runxxx
 *			or
 *			recorder8 -restart input_directory master_output_directory runxxx
 *
 *			input_directory holds input grid data and run data
 *			master_output_directory is the parent directory that holds the output from
 *				each run -- each run is in directory runxxx where xxx is the run number
 *			
 *			for a restart the starting time is read from the file t_restart (in current dir)
 *			and the starting map data is read from a SINGLE file restart_maps.dat
 *			that is in the $RCAC_SCRATCH directory 
 *
 *			note map data for restarts and map data for observing spatial variation of
 *			rho, u, v, and w are now saved in the scratch directory and data for a 
 *			given time is stored in a single file (not in separate files as in the past)
 *
 *	recorder8.2:	add capability to incorporate a surface normal vector at the surface of the
 *			instrument that is not simply perpendicular - this is hoped to be good for
 *			dealing with corners and edges to allow for a flaring horn, etc.
 *
 *			surface normal data is in the input directory with the name surface_point_info.dat
 *			and is stored in a table with the form 
 *			i	j	k	i_hat	j_hat	k_hat
 *
 *			if this file is not found in the input directory then the old way of dealing with
 *			the instrument surface is used
 *			 
 */

#include "recorder8.h"	/* general defines	*/

double blowing_velocity();
double artificial_viscosity();
double artificial_viscosity_on_recorder();
double artificial_viscosity_on_boundary();
double bigger();
double second_deriv_x();
double second_deriv_y();
double second_deriv_z();
double reset_filtering_data();

float ***allocate_array_of_floats();
int ***allocate_array_of_integers();

float ***rho,***rho_predictor;
float ***rho_ave,***u_ave,***v_ave,***w_ave;
float ***u,***v,***w;
float ***u_predictor,***v_predictor,***w_predictor;
float ***rho_buf,***u_buf,***v_buf,***w_buf;

int i_min, i_max;	/* these are the boundaries of rectangular simulation region	*/
int j_min, j_max;	/* i_min = j_min = k_min = 0 and the other boundaries are at	*/
int k_min, k_max;	/* i_max, j_max and k_max					*/

int i_init_min,i_init_max;	/* range of indices for the initialization region		*/
int j_init_min,j_init_max;
int k_init_min,k_init_max;
double init_val_j[N_Y_MAX]; /* used for initialization - typically have poiselle-like flow  */
double init_val_k[N_Z_MAX]; /* used for initialization - typically have poiselle-like flow  */
			/* in a channel	*/

double t,dt;		/* t = time and dt = time step */
double dx_min;		/* smallest grid spacing along x or y	*/
double nu;	/* kinematic viscosity		*/
double c_s;	/* speed of sound in air	*/
double rho_air;	/* density of air		*/
double courant; /* factor relating dt, dx_min, and c_s	*/
/* most of these values (nu, c_s, rho_air, etc.) are set in run_data.dat	*/

double t_start,t_end;	/* t_start = 0 = time that simulation starts (unless this run */
			/* is a restart), t_end = end	*/
double t_stop_blowing,t_ramp_down;	/* time to start ramping down the blowing	*/
			/* speed and time to take to ramp blowing speed to zero		*/
double t_record_map;	/* how often to record full results for rho, u, and v as functions */
			/* of position - for later analysis				*/
double t_map_ave;	/* average map data over this interval before saving	*/
double t_map_start;	/* when to start taking map data			*/
double t_map_end;	/* when to stop taking map data			*/
double t_record_sound;  /* how often to record rho vs. t at two listening points	*/

double u_init,v_init;	/* initialization values of u and v in specified region		*/
			/* in v3 -- v7 this region is rectangular and v_init is assumed to be 0 */

// will be accessed by geometry[i][j][k];	
int ***geometry;		/* describes the domain of the calc */
			/* the geometry (including the shape of the recorder)	*/
			/* is specified in the files geometry_solid.dat, geometry_initialization.dat*/
 			/* and geometry_sound.dat		*/
			/* in geometry[][][] OPEN = open space away from walls	*/
			/* SOLID = body of recorder		*/
			/* for other values see recorder8.h	*/

FILE *fp_t;	/* use this file to store the time at which maps 
			density and velocity vs position data are strored */

int i_record[22],j_record[22];	/* record rho vs. time at	*/
int k_record[22];	/* several different locations (currently 6)  specified in geometry_sound.dat	*/
int n_record_sound;

//double rho_t_1_sum,rho_t_2_sum; /* use to average rho at the locations for intervals */
double rho_t_sum[22];		/* t_record_sound					*/
double t_sum;			/* for averaging the time to go along with rho_t_*_sum	*/

char master_output_directory[300];
char results_directory[300];	/* directory in which all results are written - typically named runxxx 
					where xxx is a number		*/
int n_boundary;			/* number of boundary points on the recorder */
int x_boundary[N_MAX_3];	/* all points on the boundary of the recorder are stored */
int y_boundary[N_MAX_3];	/* in these three arrays (the i,j,k indices)		*/
int z_boundary[N_MAX_3];	/* in these three arrays (the i,j,k indices)		*/
int boundary_flag[N_MAX_3];	/* this flag specified what kind of boundary it is	*/
				/* this determines how it is dealt with -- to achieve	*/
				/* non-slip boundary conditions with acoustic impedance Z */
double Z_rec_1,Z_rec_2;		/* acoustic impedance of recorder surface rho = (Z_rec_1 + i omega Z_rec_2)*v_n/c^2*/
double Z_rec_1_norm;		/* define Z_rec_norm = Z / c^2	for convenience		*/
double Z_rec_2_norm;
double Z_rec_2_prime;		/* Z_2 normalized differently			*/
double Z_rec_1_2_plus;

double Z_bound_1,Z_bound_2;	/* acoustic impedance of the outer boundaries	*/
double Z_bound_1_norm,Z_bound_2_norm;	/* same as Z_rec_norm above		*/
double Z_bound_2_prime;		/* Z_2 normalized differently			*/
double Z_bound_1_2_plus;

int restart_flag;	/* = START_FRESH for a new calc, RE_START to continue a previous calc	*/
double t_restart_save_interval;	/* save things this often for poossible restart */
char restart_maps[300];	/* file names for restart info			*/

FILE *fp_log;	/* use to log info from each run	*/

double viscosity_1_0,viscosity_2_0;/* parameters determining the magnitude of	*/
				/* the artificial viscosity			*/
double t_viscosity_start;	/* time to turn on the artificial viscosity	*/

double t_blow_transient;	/* time to ramp up channel velocity		*/

int n_viscosity_monitor;	/* used for debugging				*/

double t_accum;			/* time period to accum map data to damp noise issues at high frequencies */
double t_accum_interval;	/* how often to average (t_average_interval > t_map_average)		*/
double t_accum_start;		/* when to start this averaging						*/

int boundary_Z_flag;		/* = ON means use the above values of Z for boundaries of simulation region */
				/* = OFF means set rho = u = v = 0 on boundaries	*/

double dx_grid[N_X_MAX],dy_grid[N_Y_MAX],dz_grid[N_Z_MAX];	/* grid spacings along x and y -- 
						note that dx[i] is the spacing between points i and i+1	*/
double x_grid[N_X_MAX],y_grid[N_Y_MAX],z_grid[N_Z_MAX];	/* locations in real space of grid points i,j,k	*/

char input_directory[400];
char scratch_directory[400];
char results_directory_full_path[400];

//	variables holding surface normal info
// i,j,k coordinates of all surface points
int i_surface[N_MAX_3],j_surface[N_MAX_3],k_surface[N_MAX_3]; 
double x_hat[N_MAX_3],y_hat[N_MAX_3],z_hat[N_MAX_3]; // unit vector normal to surface
int n_surface_points;	// number of surface points
int surface_normal_flag;	// = YES if this data is available

main(argc,argv)
int argc;
char *argv[];
{
	int n_threads;
	char tmp[300],mes[300];

	restart_flag = START_FRESH;	/* default is a fresh start	*/
//	strcpy(scratch_directory,getenv("RCAC_SCRATCH"));	// for runs at Purdue
	strcpy(scratch_directory,"/scratch/jwt0024");		// for runs at Auburn on hopper

	if((argc <= 2) || ((argc == 3) && (strcmp(argv[1],"-h") == 0))) {			
		fprintf(stderr,"usage: %s <input_directory> <results_directory> <master_output_directory> <runxxx>\nor\n%s -restart <input_directory> <results_directory> <master_output_directory> <runxxx>\n",argv[0],argv[0]);
		exit(0);
	}
	else if((argc == 5) && (strcmp(argv[1],"-restart") == 0)) {
		strcpy(master_output_directory,argv[3]);
		strcpy(results_directory,argv[4]);
		sprintf(results_directory_full_path,"%s/%s",master_output_directory,results_directory);
		mkdir(results_directory_full_path,0777);
		sprintf(tmp,"%s/t_record_map.dat",results_directory_full_path);
		fp_t = fopen(tmp,"w");
		strcpy(input_directory,argv[2]);	/* default location for input files	*/
		sprintf(restart_maps,"%s/restart_maps.dat",scratch_directory);
		restart_flag = RE_START;
	}
	else if((argc == 4) && (strcmp(argv[1],"-restart") != 0)) {
		strcpy(master_output_directory,argv[2]);
		strcpy(results_directory,argv[3]);
		sprintf(results_directory_full_path,"%s/%s",master_output_directory,results_directory);
		mkdir(results_directory_full_path,0777);
		sprintf(tmp,"%s/t_record_map.dat",results_directory_full_path);
		fp_t = fopen(tmp,"w");
		strcpy(input_directory,argv[1]);	/* default location for input files	*/
	}
	else {
		fprintf(stderr,"unrecognized combination of arguments\n");
		fprintf(stderr,"usage: %s <input_directory> <results_directory> <master_output_directory> <runxxx>\nor\n%s -restart <input_directory> <results_directory> <master_output_directory> <runxxx>\n",argv[0],argv[0]);
		exit(0);
	}
	
/* use a log file to record important parameters and info	*/
	sprintf(tmp,"%s/log_file",results_directory_full_path);
	fp_log = fopen(tmp,"w");
	fprintf(fp_log,"using %s\n",argv[0]);
	fprintf(fp_log,"input data for grid and geometry taken from the directory %s\n",input_directory);
	fprintf(fp_log,"map data written to scratch directory %s\n",scratch_directory);
	fprintf(fp_log,"other output data written to directory %s\n",results_directory_full_path);

#ifdef CLUSTER
// take this out for icc compiler
 	n_threads = omp_get_num_threads();	/* just to check	*/
 	fprintf(fp_log,"number of threads = %d\n",n_threads);
#endif

/* initialize the geometry of the system, including the shape and size of the recorder */
	init_geometry();

	create_arrays();

/* search through geometry file to find all boundary points on the recorder surface and	*/
/* classify them for use in dealing with boundary conditions */
	n_boundary = boundary();
	fprintf(fp_log,"number of boundary points found (on recorder) = %d\n",n_boundary);
	
/* read in values for various parameters from file run_data.dat */
	init_run_data(argv[0]);

/* initialize rho, u, and v */
	init_air();

	fflush(fp_log);

	run();

	fclose(fp_t);
	fclose(fp_log);

	cleanup_memory();
}

init_run_data(char *src_file)
{
	FILE *fp,*fp_restart;
	char tmp[300],mes[400],tmp2[300];	
	int i,j,k;

	strcpy(tmp,"run_data.dat");
	fp = fopen(tmp,"r");
	if(fp == NULL) {
		fprintf(fp_log,"can't open %s for basic run data\n",tmp);
		(void)exit(0);
	}
	
/* put a copy of run_data.dat into results directory */
	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
	system(mes);
/* add a copy of job file too - assumed to be named job.pbs	*/
	sprintf(mes,"cp job.pbs %s/",results_directory_full_path);
	system(mes);
/* and a copy of the source code too - assumed to be named src_file.c     */
	sprintf(mes,"cp %s.c %s/",src_file,results_directory_full_path);
	system(mes);

	while(1) {
		if(fgets(tmp,100,fp) == NULL) {
			fprintf(stderr, "error in reading run data file\n");
			fprintf(fp_log, "error in reading run data file\n");
			(void)exit(0);
		}
		if(tmp[0] == '#') break;
	}

	fgets(tmp,100,fp);
	sscanf(tmp,"%lf",&c_s);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf",&rho_air);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf",&nu);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf %lf %lf %lf %s",&Z_rec_1,&Z_rec_2,&Z_bound_1,&Z_bound_2,tmp2);
	if(strcmp(tmp2,"OFF") == 0) {
		boundary_Z_flag = OFF;
	}
	else {
		boundary_Z_flag = ON;
	}
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf",&courant);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf %lf",&u_init,&t_blow_transient);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf",&v_init);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf %lf %lf",&t_end,&t_stop_blowing,&t_ramp_down);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf %lf %lf %lf",&t_map_start,&t_map_end,&t_record_map,&t_map_ave);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf",&t_record_sound);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf",&t_restart_save_interval);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf %lf %lf",&viscosity_1_0,&viscosity_2_0,&t_viscosity_start);
	fgets(tmp,100,fp);
	sscanf(tmp,"%lf %lf %lf",&t_accum,&t_accum_interval,&t_accum_start);

	dt = courant * dx_min / c_s;
	if(restart_flag == START_FRESH) {
		t = t_start = 0.0;
	}	
	else {
		sprintf(tmp,"t_restart");
		fp_restart = fopen(tmp,"r");
		fscanf(fp_restart,"%lf",&t_start);
		fclose(fp_restart);
		t = t_start;
	}
	Z_rec_1_norm = Z_rec_1 / (c_s * c_s);
	Z_rec_2_norm = Z_rec_2 / (c_s * c_s);
	Z_bound_1_norm = Z_bound_1 / (c_s * c_s);
	Z_bound_2_norm = Z_bound_2 / (c_s * c_s);
	Z_rec_1_2_plus = Z_rec_1_norm + (Z_rec_2_norm/dt);
	Z_rec_2_prime = (Z_rec_2_norm/dt) / Z_rec_1_2_plus;
	Z_bound_1_2_plus = Z_bound_1_norm + (Z_bound_2_norm/dt);
	Z_bound_2_prime = (Z_bound_2_norm/dt) / Z_bound_1_2_plus;

	fprintf(fp_log,"size of simulation region: i(max) = %d\tj(max) = %d\tk(max) = %d\n",i_max,j_max,k_max);
	fprintf(fp_log,"c_s = %g\n",c_s);
	fprintf(fp_log,"rho(air) = %g\n",rho_air);
	fprintf(fp_log,"kinematic viscosity = %g\n",nu);
	fprintf(fp_log,"impedance of recorder surface = %g + i omega (%g)\n",Z_rec_1,Z_rec_2);
	fprintf(fp_log,"impedance of outer boundaries = %g + i omega (%g)\n",Z_bound_1,Z_bound_2);
	if(boundary_Z_flag == ON) {
		fprintf(fp_log,"\tuse these values for impedance of boundaries\n");
	}
	else {
		fprintf(fp_log,"\tclamp rho = u = v = 0 on boundaries\n");
	}
	fprintf(fp_log,"courant factor = %g\n",courant);
	fprintf(fp_log,"dx(min) = %g\tdt = %g\n",dx_min,dt);
	fprintf(fp_log,"u_init = %g\tv_init = %g\n",u_init,v_init);
	fprintf(fp_log,"t(ramp up channel velocity) = %g\n",t_blow_transient);
	fprintf(fp_log,"t(start) = %g\tt(end) = %g\n",t_start,t_end);
	fprintf(fp_log,"t(stop blowing) = %g\tt(ramp blowing to zero) = %g\n",t_stop_blowing,t_ramp_down);
	fprintf(fp_log,"start map recording at %g\ndt(record-maps) = %g\tdt(map ave) = %g\n",t_map_start,t_record_map,t_map_ave);
	fprintf(fp_log,"stop map recording at %g\n",t_map_end);
	fprintf(fp_log,"record rho at (%g,%g,%g), (%g,%g,%g), and (%g,%g,%g)\n",x_grid[i_record[1]],y_grid[j_record[1]],
		z_grid[k_record[1]],x_grid[i_record[2]],y_grid[j_record[2]],z_grid[k_record[2]],x_grid[i_record[3]],y_grid[j_record[3]],z_grid[k_record[3]]);
	fprintf(fp_log,"dt(record-sound) = %g\n",t_record_sound);
	fprintf(fp_log,"dt(record-for-restart) = %g\n",t_restart_save_interval);
	fprintf(fp_log,"artificial viscosity param1= %g and param2 =  %g\n\tturn artificial viscosity on at %g\n",viscosity_1_0,viscosity_2_0,t_viscosity_start);
	fprintf(fp_log,"average map data over time intervals of %lf s\naverage every %lf s, start averaging at %lf s\n",t_accum,t_accum_interval,t_accum_start);

	fclose(fp);

	fprintf(fp_log,"\nx grid info (i, x, dx)\n");
	for(i = 0; i <= i_max; i++) fprintf(fp_log,"%d\t%g\t%g\n",i,x_grid[i],dx_grid[i]);

	fprintf(fp_log,"\ny grid info (i, y, dy)\n");
	for(j = 0; j <= j_max; j++) fprintf(fp_log,"%d\t%g\t%g\n",j,y_grid[j],dy_grid[j]);

	fprintf(fp_log,"\nz grid info (i, z, dz)\n");
	for(k = 0; k <= k_max; k++) fprintf(fp_log,"%d\t%g\t%g\n",k,z_grid[k],dz_grid[k]);

	return;
}

/*
 *	WARNING: this routine assumes that the initialization region (i.e., the blowing region
 *	has v = w = 0 and
 *	u = constant along x and following a dependence along y that is 
 *	Poiselle-like (and read from the file u_init)
 *
 */
init_air()
{
	int i,j,k;
	FILE *fp_map_restart;
	char tmp[400],tmp2[400];
	double val_rho,val_u,val_v,val_w;

	fprintf(fp_log,"corner of initialization region at i = %d\tj = %d\tk = %d, with u(init) = %g\n",i_init_min,j_init_min,k_init_min,u_init); 

	if(restart_flag == START_FRESH) {
		fprintf(fp_log,"starting fresh\n");

		for(i = 0; i <= i_max; i++) {
			for(j = 0; j <= j_max; j++) {
				for(k = 0; k <= k_max; k++) {
					rho[i][j][k] = u[i][j][k] = v[i][j][k] = w[i][j][k] = 0.0;
					rho_predictor[i][j][k] = u_predictor[i][j][k] = v_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
					if(geometry[i][j][k] == INIT_POINT) {
						u[i][j][k] = u_predictor[i][j][k] = blowing_velocity(0.0,i,j,k);
					}
				}
			}
		}
		for(i = i_init_min; i <= i_init_max; i++) {
			for(j = j_init_min; j <= j_init_max; j++) {
				for(k = k_init_min; k <= k_init_max; k++) {
					if(geometry[i][j][k] != INIT_POINT) {
						fprintf(fp_log,"error B: initializing rho i,j,k (%d %d %d) not in init region\n",i,j,k);
						exit(0);
					}
					rho[i][j][k] = rho[i-1][j][k] + (dx_grid[i-1]*rho_air/(c_s*c_s))*nu*second_deriv_y(u,i,j,k);
				}
			}
		}
	}
	else {		/* making a restart	*/
		sprintf(tmp2,"%s/restart_maps.dat",scratch_directory);
		fp_map_restart = fopen(tmp2,"r");
		while(1) {
			if(fgets(tmp,300,fp_map_restart) == NULL) break;
			sscanf(tmp,"%d %d %d %lf %lf %lf %lf",&i,&j,&k,&val_rho,&val_u,&val_v,&val_w);
			rho[i][j][k] = rho_predictor[i][j][k] = val_rho;
			u[i][j][k] = u_predictor[i][j][k] = val_u;
			v[i][j][k] = v_predictor[i][j][k] = val_v;
			w[i][j][k] = w_predictor[i][j][k] = val_w;
		}
		fclose(fp_map_restart);
	}
	reset_map(rho_buf);	/* zero these buffers prior to start	*/
	reset_map(u_buf);
	reset_map(v_buf);
	reset_map(w_buf);

	return;
}

init_geometry()
{
	int i,j,k,n,m,p;
	FILE *fp_solid,*fp_sound,*fp_initialize,*fp_x_grid,*fp_y_grid,*fp_z_grid,*fp_u_init;
	FILE *fp_surface_normal;
	char tmp[300],tmp2[300];
	int i_rec,j_rec,k_rec;
	char mes[400];
	double x_tmp,y,z;

	dx_min = 1.0;
	sprintf(tmp,"%s/x_grid.dat",input_directory);
	fp_x_grid = fopen(tmp,"r");
	if(fp_x_grid == NULL) {
		fprintf(fp_log,"can't open %s for x grid data\n",tmp);
		exit(0);
	}
//	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
//	system(mes);
// no need		/* put a copy of this file into the results directory */
	i = 0;
	while(1) {
		if(fgets(tmp,200,fp_x_grid) == NULL) break;
		sscanf(tmp,"%d %lf %lf",&n,&(x_grid[i]),&(dx_grid[i]));
		if(dx_min > dx_grid[i]) dx_min = dx_grid[i];
		++i;
	}
	i_max = i - 1;
	fclose(fp_x_grid);

	sprintf(tmp,"%s/y_grid.dat",input_directory);
	fp_y_grid = fopen(tmp,"r");
	if(fp_y_grid == NULL) {
		fprintf(fp_log,"can't open %s for y grid data\n",tmp);
		exit(0);
	}
//	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
//	system(mes);
//		/* put a copy of this file into the results directory */
	j = 0;
	while(1) {
		if(fgets(tmp,200,fp_y_grid) == NULL) break;
		sscanf(tmp,"%d %lf %lf",&n,&(y_grid[j]),&(dy_grid[j]));
		if(dx_min > dy_grid[j]) dx_min = dy_grid[j];
		++j;
	}
	j_max = j - 1;
	fclose(fp_y_grid);

// now ready for the grid data along z
	sprintf(tmp,"%s/z_grid.dat",input_directory);
	fp_z_grid = fopen(tmp,"r");
	if(fp_z_grid == NULL) {
		fprintf(fp_log,"can't open %s for z grid data\n",tmp);
		exit(0);
	}
//	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
//	system(mes);
//		/* put a copy of this file into the results directory */
	k = 0;
	while(1) {
		if(fgets(tmp,200,fp_z_grid) == NULL) break;
		sscanf(tmp,"%d %lf %lf",&n,&(z_grid[k]),&(dz_grid[k]));
		if(dx_min > dz_grid[k]) dx_min = dz_grid[k];
		++k;
	}
	k_max = k - 1;
	fclose(fp_z_grid);

// allocate memory for geometry array - this is its first use
	geometry = allocate_array_of_integers(i_max+2,j_max+2,k_max+2);

	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				geometry[i][j][k] = OPEN;
			}
		}
	}

	sprintf(tmp,"%s/geometry_solid.dat",input_directory);
	fp_solid = fopen(tmp,"r");
	if(fp_solid == NULL) {
		fprintf(fp_log,"can't open %s for geometry data\n",tmp);
		exit(0);
	}
//	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
//	system(mes);
//		/* put a copy of this file into the results directory */

	fprintf(fp_log,"coordinate ranges (x) %d, (y) %d, (z) %d \n",i_max,j_max,k_max);

	while(1) {
		if(fgets(tmp,200,fp_solid) == NULL) break;
		sscanf(tmp,"%d %d %d",&i,&j,&k);
		if((i > i_max) || (j > j_max) || (k > k_max)) {
			fprintf(fp_log,"coordinates %d,%d,%d in geometry_solid.dat out of bounds\n",i,j,k);
			fflush(fp_log);
			exit(0);
		}
		geometry[i][j][k] = SOLID;
	}
	fclose(fp_solid);

	fprintf(fp_log,"coordinate ranges (x) %d, (y) %d, (z) %d \n",i_max,j_max,k_max);

	i_init_min = i_max;
	i_init_max = 0;
	j_init_min = j_max;
	j_init_max = 0;
	k_init_min = k_max;
	k_init_max = 0;
	sprintf(tmp,"%s/geometry_initialize.dat",input_directory);
	fp_initialize = fopen(tmp,"r");
	if(fp_initialize == NULL) {
		fprintf(fp_log,"can't open %s for geometry data\n",tmp);
		exit(0);
	}
	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
	system(mes);
		/* put a copy of this file into the results directory */
	while(1) {
		if(fgets(tmp,200,fp_initialize) == NULL) break;
		sscanf(tmp,"%d %d %d",&i,&j,&k);
		if((i > i_max) || (j > j_max) || (k > k_max)) {
			fprintf(fp_log,"coordinates %d,%d,%d in geometry_initialize.dat out of bounds\n",i,j,k);
			fflush(fp_log);
			exit(0);
		}
		geometry[i][j][k] = INIT_POINT;
		if(i < i_init_min) i_init_min = i;
		if(i > i_init_max) i_init_max = i;
		if(j < j_init_min) j_init_min = j;
		if(j > j_init_max) j_init_max = j;
		if(k < k_init_min) k_init_min = k;
		if(k > k_init_max) k_init_max = k;
	}
	fclose(fp_initialize);

	fprintf(fp_log,"coordinate ranges for initialization of u:\n(x) %d-%d, (y) %d-%d, (z) %d-%d \n",i_init_min,i_init_max,j_init_min,j_init_max,k_init_min,k_init_max);

	sprintf(tmp,"%s/geometry_sound.dat",input_directory);
	fp_sound = fopen(tmp,"r");
	if(fp_sound == NULL) {
		fprintf(fp_log,"can't open %s for geometry data\n",tmp);
		fflush(fp_log);
		exit(0);
	}
	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
	system(mes); 	/* put a copy of this file into the results directory */

	n = 0;
	while(1) {
		if(fgets(tmp,200,fp_sound) == NULL) break;
		if(n > 20) {
			fprintf(fp_log,"something is wrong in %s\n",tmp);
			fflush(fp_log);
			exit(0);
		}
		++n;
		sscanf(tmp,"%d %d %d",&i,&j,&k);
		if((i >= i_max) || (j >= j_max) || (k >= k_max)) {
			fprintf(fp_log,"coordinates %d,%d,%d in geometry_sound.dat out of bounds\n",i,j,k);
			fflush(fp_log);
			exit(0);
		}
		i_record[n] = i;
		j_record[n] = j;
		k_record[n] = k;
		fprintf(fp_log,"recording point %d is i = %d\tj = %d\tk = %d\n",n,i,j,k);
		n_record_sound = n;
	}
	fclose(fp_sound);

	for(i = 0; i <= i_max; i++) {
		for(k = 0; k <= k_max; k++) {
			geometry[i][0][k] = LOWER_Y_BOUNDARY;
			geometry[i][j_max][k] = UPPER_Y_BOUNDARY;
		}
	}
	for(j = 0; j <= j_max; j++) {
		for(k = 0; k <= k_max; k++) {
			geometry[0][j][k] = LOWER_X_BOUNDARY;
			geometry[i_max][j][k] = UPPER_X_BOUNDARY;
		}
	}
	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			geometry[i][j][0] = LOWER_Z_BOUNDARY;
			geometry[i][j][k_max] = UPPER_Z_BOUNDARY;
		}
	}

	sprintf(tmp,"%s/u_init.dat",input_directory);
	fp_u_init = fopen(tmp,"r");
	if(fp_u_init == NULL) {
		fprintf(fp_log,"can't open %s for u initialization data\n",tmp);
		fflush(fp_log);
		exit(0);
	}
	sprintf(mes,"cp %s %s/",tmp,results_directory_full_path);
	system(mes); 		/* put a copy of this file into the results directory */
	for(j = j_init_min; j <= j_init_max; j++) {
		fscanf(fp_u_init,"%lf",&(init_val_j[j]));
	}
// do not follow Poiseuille's law along k
	for(k = k_init_min; k <= k_init_max; k++) {
		init_val_k[k] = 1.0;
	}
	fclose(fp_u_init);

	fprintf(fp_log,"init factors for u(init) in channel\n");
	fprintf(fp_log,"along y direction:\n"); 
	for(j = j_init_min; j <= j_init_max; j++) {
		fprintf(fp_log,"\t%lf",init_val_j[j]);
	}
	fprintf(fp_log,"\n");
	fprintf(fp_log,"along z direction:\n");
	for(k = k_init_min; k <= k_init_max; k++) {
		fprintf(fp_log,"\t%lf",init_val_k[k]);
	}
	fprintf(fp_log,"\n");

// now check for and readin surface normal data
	sprintf(tmp,"%s/surface_point_info.dat",input_directory);
	fp_surface_normal = fopen(tmp,"r");
	if(fp_surface_normal == NULL) {
		surface_normal_flag = NO;
		fprintf(fp_log,"\nNOT using surface normal info\n\n");
	}
	else {
		surface_normal_flag = YES;
		n_surface_points = 0;
		fprintf(fp_log,"\nusing surface normal info from file %s\n\n",tmp);

// /* add a copy of surface normal file too - assumed to be named surface_point_info.dat	*/
// 		sprintf(mes,"cp %s/surface_point_info.dat %s/",input_directory,results_directory_full_path);
// 		system(mes);

		while(1) {
			if((fscanf(fp_surface_normal,"%d %d %d %lf %lf %lf",&i,&j,&k,&x_tmp,&y,&z)) == EOF) break;
			i_surface[n_surface_points] = i;
			j_surface[n_surface_points] = j;
			k_surface[n_surface_points] = k;
			x_hat[n_surface_points] = x_tmp;
			y_hat[n_surface_points] = y;
			z_hat[n_surface_points] = z;
			boundary_flag[n_surface_points] = CORNER;
			if(x_hat[n_surface_points] > 0.9) boundary_flag[n_surface_points] = XUP;
			if(x_hat[n_surface_points] < -0.9) boundary_flag[n_surface_points] = XDOWN;
			if(y_hat[n_surface_points] > 0.9) boundary_flag[n_surface_points] = YUP;
			if(y_hat[n_surface_points] < -0.9) boundary_flag[n_surface_points] = YDOWN;
			if(z_hat[n_surface_points] > 0.9) boundary_flag[n_surface_points] = ZUP;
			if(z_hat[n_surface_points] < -0.9) boundary_flag[n_surface_points] = ZDOWN;

			++n_surface_points;
		}
		fclose(fp_surface_normal);
	}
	
	return;
}

// ??? start editing here

/*
 *	do the calculation here
 *	use explicit MacCormack method
 *
 *	results for rho vs time at three locations are also stored
 *	along with map data for rho, u, v, and w if desired
 */
run()
{
	int i,j,k,k_map,k2,m,n_record,n_record_2,n_file_num,p;
	FILE *fp_rho_t[22];
	char tmp1[300],tmp2[300],tmp3[300],tmp4[300];
	double t_next_map;	/*	time at which to save next set of maps	*/
	double t_next_record;	/*	time to record next map			*/
	double t_next_restart_save;
	double viscosity_1,viscosity_2;
	int n_threads;
	double current_t,start_t;
	int n_benchmark;
	double t_next_accum,t_accum_end;
	int n_accum_buffer_count;
	int accum_flag;
	int nan_flag = NO;
	double rho_max,x_max,y_max,z_max;
	double vel_perp;

	for(i = 1; i <= n_record_sound; i++) {
		sprintf(tmp1,"%s/rho_time_%d",results_directory_full_path,i);
		fp_rho_t[i] = fopen(tmp1,"w");
	}

	n_record_2 = t_record_sound / dt;
	t_next_record = t_map_start;
	t_next_map = t_next_record - t_map_ave;
	n_record = t_record_map / (2 *  dt);
	n_record_2 = t_record_sound / (2 *  dt);
	t_next_restart_save = t + t_restart_save_interval;

	if(restart_flag == RE_START) {
		if(t >= t_accum_start) {
			t_next_accum = t + t_accum_interval;
		}
		else {
			t_next_accum = t_accum_start;
		}
	}	
	else {
		t_next_accum = t_accum_start;
	}
	t_accum_end = t_next_accum + t_accum;

	zero_filtering_data();
	n_accum_buffer_count = 0;
	accum_flag = NO;

	n_file_num = 1;
	k_map = 0;
	k2 = 0;
	for(i = 1; i <= n_record_sound; i++) {
		rho_t_sum[i] = 0.0;
	}
	t_sum = 0.0;
	viscosity_1 = viscosity_2 = 0.0;
// new in version 8a
	if(t >= t_viscosity_start) {
		viscosity_1 = viscosity_1_0;
		viscosity_2 = viscosity_2_0;
	}

	if(restart_flag != RE_START) {
		for(i = 1; i <= n_record_sound; i++) {
			fprintf(fp_rho_t[i],"%g\t%g\n",t_sum,rho[i_record[i]][j_record[i]][k_record[i]]);
			fflush(fp_rho_t[i]);
		}
	}
	n_threads = 0;

#ifdef CLUSTER
#pragma omp parallel 
{
// take out for icc
 n_threads = omp_get_num_threads();	/* just to check	*/
 if(omp_get_thread_num() == 1) {
 	fprintf(fp_log,"number of threads = %d\n",n_threads);
 	fflush(fp_log);
 }
}
#endif

#ifdef BENCHMARK
n_benchmark = 0;
start_t = current_t = omp_get_wtime();
fprintf(fp_log,"real start to calc\tt = %g\n",current_t);
#endif

  while(t <= t_end) {
{

#ifdef BENCHMARK
start_t = current_t = omp_get_wtime();
fprintf(fp_log,"start first mac loop\tt = %g\n",current_t);
#endif

// fprintf(fp_log,"start first mac loop\tt = %g\n",current_t);

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif

	for(i = 1; i <= i_max-1; i++) {
		for(j = 1; j <= j_max-1; j++) {
			for(k = 1; k <= k_max-1; k++) {
/* for interior points, away from any boundaries			 */
/* do predictor step */
				if(geometry[i][j][k] == OPEN) {
			  	rho_predictor[i][j][k] = rho[i][j][k]
				  - (dt/dx_grid[i]) * u[i][j][k] * (rho[i+1][j][k] - rho[i][j][k])
				  - (dt/dx_grid[i]) * rho_air * (u[i+1][j][k] - u[i][j][k])
				  - (dt/dy_grid[j]) * v[i][j][k] * (rho[i][j+1][k] - rho[i][j][k])
				  - (dt/dy_grid[j]) * rho_air * (v[i][j+1][k] - v[i][j][k])
				  - (dt/dz_grid[k]) * w[i][j][k] * (rho[i][j][k+1] - rho[i][j][k])
				  - (dt/dz_grid[k]) * rho_air * (w[i][j][k+1] - w[i][j][k])
				  + artificial_viscosity(i,j,k,rho,rho,viscosity_1,viscosity_2);

				  u_predictor[i][j][k] = u[i][j][k] 
				  - (dt/dx_grid[i]) * u[i][j][k] * (u[i+1][j][k] - u[i][j][k])
				  - (dt/dy_grid[j]) * v[i][j][k] * (u[i][j+1][k] - u[i][j][k])
				  - (dt/dz_grid[k]) * w[i][j][k] * (u[i][j][k+1] - u[i][j][k])
				  -(c_s*c_s/rho_air)*(dt/dx_grid[i]) * (rho[i+1][j][k] - rho[i][j][k])
				  +nu*dt*(second_deriv_x(u,i,j,k)+second_deriv_y(u,i,j,k)+second_deriv_z(u,i,j,k))
				  + artificial_viscosity(i,j,k,u,rho,viscosity_1,viscosity_2);

				  v_predictor[i][j][k] = v[i][j][k] 
				  - (dt/dx_grid[i]) * u[i][j][k] * (v[i+1][j][k] - v[i][j][k])
				  - (dt/dy_grid[j]) * v[i][j][k] * (v[i][j+1][k] - v[i][j][k])
				  - (dt/dz_grid[k]) * w[i][j][k] * (v[i][j][k+1] - v[i][j][k])
				  -(c_s*c_s/rho_air)*(dt/dy_grid[j]) * (rho[i][j+1][k] - rho[i][j][k])
				  +nu*dt*(second_deriv_x(v,i,j,k)+second_deriv_y(v,i,j,k)+second_deriv_z(v,i,j,k))
				  + artificial_viscosity(i,j,k,v,rho,viscosity_1,viscosity_2);

				  w_predictor[i][j][k] = w[i][j][k] 
				  - (dt/dx_grid[i]) * u[i][j][k] * (w[i+1][j][k] - w[i][j][k])
				  - (dt/dy_grid[j]) * v[i][j][k] * (w[i][j+1][k] - w[i][j][k])
				  - (dt/dz_grid[k]) * w[i][j][k] * (w[i][j][k+1] - w[i][j][k])
				  -(c_s*c_s/rho_air)*(dt/dz_grid[k]) * (rho[i][j][k+1] - rho[i][j][k])
				  +nu*dt*(second_deriv_x(w,i,j,k)+second_deriv_y(w,i,j,k)+second_deriv_z(w,i,j,k))
// fixed this error 6-6-12 v8b	  + artificial_viscosity(i,j,k,v,viscosity_1,viscosity_2);
				  + artificial_viscosity(i,j,k,w,rho,viscosity_1,viscosity_2);
				}
			}
		}
	}
}

#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"finished first mac loop (1)\tt = %g\n",current_t - start_t);
#endif

// save_for_restart();
// my_exit();

// fprintf(fp_log,"finished first mac loop (1)\tt = %g\n",current_t - start_t);

{

if(surface_normal_flag == NO) {

#ifdef CLUSTER
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k,m)
#endif

/* next deal with points on the surface of the recorder - these have an acoustic impedance Z */
	for(m = 0; m < n_boundary; m++) {
		i = x_boundary[m];
		j = y_boundary[m];
		k = z_boundary[m];
		if(boundary_flag[m] == XUP) {
			v_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dx_grid[i])*rho_air*(u[i+1][j][k] - u[i][j][k])
						-(dt/dx_grid[i])*u[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == XDOWN) {
			v_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dx_grid[i-1])*rho_air*(u[i][j][k] - u[i-1][j][k])
						-(dt/dx_grid[i-1])*u[i][j][k]*(rho[i][j][k] - rho[i-1][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == YUP) {
			u_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dy_grid[j])*rho_air*(v[i][j+1][k] - v[i][j][k])
						-(dt/dy_grid[j])*v[i][j][k]*(rho[i][j+1][k] - rho[i][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == YDOWN) {
			u_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dy_grid[j-1])*rho_air*(v[i][j][k] - v[i][j-1][k])
						-(dt/dy_grid[j-1])*v[i][j][k]*(rho[i][j][k] - rho[i][j-1][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == ZUP) {
			u_predictor[i][j][k] = v_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dz_grid[k])*rho_air*(w[i][j][k+1] - w[i][j][k])
						-(dt/dz_grid[k])*w[i][j][k]*(rho[i][j][k+1] - rho[i][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == ZDOWN) {
			u_predictor[i][j][k] = v_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dz_grid[k-1])*rho_air*(w[i][j][k] - w[i][j][k-1])
						-(dt/dz_grid[k-1])*w[i][j][k]*(rho[i][j][k] - rho[i][j][k-1])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
	}
}
else {

// 7e
// save_predictor();
// my_exit();

/* next deal with points on the surface of the recorder using the surface normal info 
							- these have an acoustic impedance Z */
#ifdef CLUSTER
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k,m)
#endif

	for(m = 0; m < n_surface_points; m++) {
		i = i_surface[m];
		j = j_surface[m];
		k = k_surface[m];
		rho_predictor[i][j][k] = rho[i][j][k] + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);

		if(x_hat[m] > 0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dx_grid[i])*rho_air*(u[i+1][j][k] - u[i][j][k])
				-(dt/dx_grid[i])*u[i][j][k]*(rho[i+1][j][k] - rho[i][j][k]);
		}
		else if(x_hat[m] < -0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dx_grid[i-1])*rho_air*(u[i][j][k] - u[i-1][j][k])
				-(dt/dx_grid[i-1])*u[i][j][k]*(rho[i][j][k] - rho[i-1][j][k]);
		}
// fixed a bug in the next 4 if statements -- used index i for all the grid indices
// fixed in v. 8.3
		if(y_hat[m] > 0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dy_grid[j])*rho_air*(v[i][j+1][k] - v[i][j][k])
				-(dt/dy_grid[j])*v[i][j][k]*(rho[i][j+1][k] - rho[i][j][k]);
		}
		else if(y_hat[m] < -0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dy_grid[j-1])*rho_air*(v[i][j][k] - v[i][j-1][k])
				-(dt/dy_grid[j-1])*v[i][j][k]*(rho[i][j][k] - rho[i][j-1][k]);
		}
		if(z_hat[m] > 0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dz_grid[k])*rho_air*(w[i][j][k+1] - w[i][j][k])
				-(dt/dz_grid[k])*w[i][j][k]*(rho[i][j][k+1] - rho[i][j][k]);
		}
		else if(z_hat[m] < -0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dz_grid[k-1])*rho_air*(w[i][j][k] - w[i][j][k-1])
				-(dt/dz_grid[k-1])*w[i][j][k]*(rho[i][j][k] - rho[i][j][k-1]);
		}

		vel_perp = rho_predictor[i][j][k] / Z_rec_1_2_plus;
		u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + vel_perp * x_hat[m]; 
		v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + vel_perp * y_hat[m]; 
		w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + vel_perp * z_hat[m]; 

	}

// 7b
// save_for_restart();
// 
// my_exit();

// 7i
// save_predictor();
// my_exit();


/*
	for(m = 0; m < n_surface_points; m++) {
		i = i_surface[m];
		j = j_surface[m];
		k = k_surface[m];
		rho_predictor[i][j][k] = rho[i][j][k]
					-(dt/dx_grid[i])*rho_air*(u[i+1][j][k] - u[i][j][k])
					-(dt/dx_grid[i])*u[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
					-(dt/dy_grid[i])*rho_air*(v[i+1][j][k] - v[i][j][k])
					-(dt/dy_grid[i])*v[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
					-(dt/dz_grid[i])*rho_air*(w[i+1][j][k] - w[i][j][k])
					-(dt/dz_grid[i])*w[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
                               + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
		vel_perp = rho_predictor[i][j][k] / Z_rec_1_2_plus;
		u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + vel_perp * x_hat[m]; 
		v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + vel_perp * y_hat[m]; 
		w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + vel_perp * z_hat[m]; 
	}
*/
}

/* next deal with points on the boundaries - 
	these have an acoustic impedance Z_bound = Z_bound_1 + i omega Z_bound_2 
   note that we don't have to worry about the corners
*/
	if(boundary_Z_flag == ON) {

#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(j = 1; j <= j_max - 1; j++) {	// boundary plane at x = 0 
			for(k = 1; k <= k_max - 1; k++) {	
				v_predictor[0][j][k] = w_predictor[0][j][k] = 0.0;
				rho_predictor[0][j][k] = rho[0][j][k] 
							-(dt/dx_grid[0])*rho_air*(u[1][j][k] - u[0][j][k])
							-(dt/dx_grid[0])*u[0][j][k]*(rho[1][j][k] - rho[0][j][k])
               	                 + artificial_viscosity_on_boundary(0,j,k,viscosity_1,viscosity_2);
				u_predictor[0][j][k] 
				= u[0][j][k]*Z_bound_2_prime+rho_predictor[0][j][k]/Z_bound_1_2_plus;
			}
		}
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(j = 1; j <= j_max - 1; j++) {	// boundary plane at x = x_max
			for(k = 1; k <= k_max - 1; k++) {	
				v_predictor[i_max][j][k] = w_predictor[i_max][j][k] = 0.0;
				rho_predictor[i_max][j][k] = rho[i_max][j][k]
					-(dt/dx_grid[i_max-1])*rho_air*(u[i_max][j][k]-u[i_max-1][j][k])
					-(dt/dx_grid[i_max-1])*u[i_max][j][k]*(rho[i_max][j][k]-rho[i_max-1][j][k])
               	                 + artificial_viscosity_on_boundary(i_max,j,k,viscosity_1,viscosity_2);
				u_predictor[i_max][j][k] 
				= u[i_max][j][k]*Z_bound_2_prime+rho_predictor[i_max][j][k]/Z_bound_1_2_plus;
			}
		}
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// boundary plane at y = 0
			for(k = 1; k <= k_max - 1; k++) {	
				u_predictor[i][0][k] = w_predictor[i][0][k] = 0.0;
				rho_predictor[i][0][k] = rho[i][0][k]
							-(dt/dy_grid[0])*rho_air*(v[i][1][k] - v[i][0][k])
							-(dt/dy_grid[0])*v[i][0][k]*(rho[i][1][k] - rho[i][0][k])
               	                 + artificial_viscosity_on_boundary(i,0,k,viscosity_1,viscosity_2);
				v_predictor[i][0][k] = 
				v[i][0][k] * Z_bound_2_prime + rho_predictor[i][0][k] / Z_bound_1_2_plus;
			}
		}
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// boundary plane at y = y_max
			for(k = 1; k <= k_max - 1; k++) {	
				u_predictor[i][j_max][k] = w_predictor[i][j_max][k] = 0.0;
				rho_predictor[i][j_max][k] = rho[i][j_max][k]
					-(dt/dy_grid[j_max-1])*rho_air*(v[i][j_max][k] - v[i][j_max-1][k])
					-(dt/dy_grid[j_max-1])*v[i][j_max][k]*(rho[i][j_max][k]-rho[i][j_max-1][k])
               	                 + artificial_viscosity_on_boundary(i,j_max,k,viscosity_1,viscosity_2);
				v_predictor[i][j_max][k] = 
				v[i][j_max][k] * Z_bound_2_prime + rho_predictor[i][j_max][k] / Z_bound_1_2_plus;
			}
		}
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// boundary plane at z = 0
			for(j = 1; j <= j_max - 1; j++) {	
				u_predictor[i][j][0] = v_predictor[i][j][0] = 0.0;
				rho_predictor[i][j][0] = rho[i][j][0]
							-(dt/dz_grid[0])*rho_air*(w[i][j][1] - w[i][j][0])
							-(dt/dz_grid[0])*w[i][j][0]*(rho[i][j][1] - rho[i][j][0])
               	                 + artificial_viscosity_on_boundary(i,j,0,viscosity_1,viscosity_2);
				w_predictor[i][j][0] = 
				w[i][j][0] * Z_bound_2_prime + rho_predictor[i][j][0] / Z_bound_1_2_plus;
			}
		}
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// boundary plane at z = z_max
			for(j = 1; j <= j_max - 1; j++) {	
				u_predictor[i][j][k_max] = v_predictor[i][j][k_max] = 0.0;
				rho_predictor[i][j][k_max] = rho[i][j][k_max]
					-(dt/dz_grid[k_max-1])*rho_air*(w[i][j][k_max] - w[i][j][k_max-1])
					-(dt/dz_grid[k_max-1])*w[i][j][k_max]*(rho[i][j][k_max]-rho[i][j][k_max-1])
               	                 + artificial_viscosity_on_boundary(i,j,k_max,viscosity_1,viscosity_2);
				w_predictor[i][j][k_max] = 
				w[i][j][k_max] * Z_bound_2_prime + rho_predictor[i][j][k_max] / Z_bound_1_2_plus;
			}
		}
	}
}


/* now deal with rho in region where u is held constant	*/
	for(i = i_init_min; i <= i_init_max; i++) {
		for(j = j_init_min; j <= j_init_max; j++) {
			for(k = k_init_min; k <= k_init_max; k++) {
				rho_predictor[i][j][k] = rho_predictor[i-1][j][k] + (dx_grid[i-1]*rho_air/(c_s*c_s))*nu*second_deriv_y(u_predictor,i,j,k);
			}
		}
	}

#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"starting second mac loop (2)\tt = %g\n",current_t - start_t);
#endif

// fprintf(fp_log,"starting second mac loop (2)\tt = %g\n",current_t - start_t);
// fflush(fp_log);

{

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(rho,u,v,w,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k)
#endif

	for(i = 1; i <= i_max-1; i++) {
		for(j = 1; j <= j_max-1; j++) {
			for(k = 1; k <= k_max-1; k++) {
/* for interior points, away from any boundaries			 */
/* next do corrector step */
				if(geometry[i][j][k] == OPEN) {

			  rho[i][j][k] = 0.5 * (rho[i][j][k] + rho_predictor[i][j][k] 
-(dt/dx_grid[i-1]) * rho_air * (u_predictor[i][j][k] - u_predictor[i-1][j][k])  
-(dt/dx_grid[i-1]) * u_predictor[i][j][k] * (rho_predictor[i][j][k] - rho_predictor[i-1][j][k])
-(dt/dy_grid[j-1]) * rho_air * (v_predictor[i][j][k] - v_predictor[i][j-1][k])
-(dt/dy_grid[j-1]) * v_predictor[i][j][k] * (rho_predictor[i][j][k] - rho_predictor[i][j-1][k])
-(dt/dz_grid[k-1]) * rho_air * (w_predictor[i][j][k] - w_predictor[i][j][k-1])
-(dt/dz_grid[k-1]) * w_predictor[i][j][k] * (rho_predictor[i][j][k] - rho_predictor[i][j][k-1])
			  + artificial_viscosity(i,j,k,rho_predictor,rho_predictor,viscosity_1,viscosity_2));
// found a bug here and in other places involving the predictors -- they were not 
// multiplied by the 0.5 factor -- this means the viscosity factor was a factor
// of 2 higher for this step in the algorithm - may not matter, but it might

			  u[i][j][k] = 0.5 * (u[i][j][k] + u_predictor[i][j][k]
-(dt/dx_grid[i-1]) * u_predictor[i][j][k] * (u_predictor[i][j][k] - u_predictor[i-1][j][k]) 
-(dt/dy_grid[j-1]) * v_predictor[i][j][k] * (u_predictor[i][j][k] - u_predictor[i][j-1][k]) 
-(dt/dz_grid[k-1]) * w_predictor[i][j][k] * (u_predictor[i][j][k] - u_predictor[i][j][k-1]) 
-(c_s*c_s/rho_air)*(dt/dx_grid[i-1]) * (rho_predictor[i][j][k] - rho_predictor[i-1][j][k])
+nu*dt*(second_deriv_x(u_predictor,i,j,k) + second_deriv_y(u_predictor,i,j,k)+second_deriv_z(u_predictor,i,j,k))
			  + artificial_viscosity(i,j,k,u_predictor,rho_predictor,viscosity_1,viscosity_2));
	
			  v[i][j][k] = 0.5 * (v[i][j][k] + v_predictor[i][j][k]
-(dt/dx_grid[i-1]) * u_predictor[i][j][k] * (v_predictor[i][j][k] - v_predictor[i-1][j][k]) 
-(dt/dy_grid[j-1]) * v_predictor[i][j][k] * (v_predictor[i][j][k] - v_predictor[i][j-1][k]) 
-(dt/dz_grid[k-1]) * w_predictor[i][j][k] * (v_predictor[i][j][k] - v_predictor[i][j][k-1]) 
-(c_s*c_s/rho_air)*(dt/dy_grid[j-1]) * (rho_predictor[i][j][k] - rho_predictor[i][j-1][k])
+nu*dt*(second_deriv_x(v_predictor,i,j,k) + second_deriv_y(v_predictor,i,j,k)+second_deriv_z(v_predictor,i,j,k))
			  + artificial_viscosity(i,j,k,v_predictor,rho_predictor,viscosity_1,viscosity_2));
	
			  w[i][j][k] = 0.5 * (w[i][j][k] + w_predictor[i][j][k]
-(dt/dx_grid[i-1]) * u_predictor[i][j][k] * (w_predictor[i][j][k] - w_predictor[i-1][j][k]) 
-(dt/dy_grid[j-1]) * v_predictor[i][j][k] * (w_predictor[i][j][k] - w_predictor[i][j-1][k]) 
-(dt/dz_grid[k-1]) * w_predictor[i][j][k] * (w_predictor[i][j][k] - w_predictor[i][j][k-1]) 
-(c_s*c_s/rho_air)*(dt/dz_grid[k-1]) * (rho_predictor[i][j][k] - rho_predictor[i][j][k-1])
+nu*dt*(second_deriv_x(w_predictor,i,j,k) + second_deriv_y(w_predictor,i,j,k)+second_deriv_z(w_predictor,i,j,k))
			  + artificial_viscosity(i,j,k,w_predictor,rho_predictor,viscosity_1,viscosity_2));
				}
			}
		}
	}
}

// 7c
// save_for_restart();
// my_exit();

#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"finished second mac loop (3)\tt = %g\n",current_t - start_t);
#endif
// fprintf(fp_log,"finished second mac loop (3)\tt = %g\n",current_t - start_t);
// fflush(fp_log);

{

/* finally - update points on recorder */
#ifdef CLUSTER
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k,m)
#endif

        for(m = 0; m < n_boundary; m++) {
                i = x_boundary[m];
                j = y_boundary[m];
                k = z_boundary[m];
                u[i][j][k] = u_predictor[i][j][k];
                v[i][j][k] = v_predictor[i][j][k];
                w[i][j][k] = w_predictor[i][j][k];
                rho[i][j][k] = rho_predictor[i][j][k];
        }

/* update points on the boundaries - note that we don't have to worry about the corners
*/

        if(boundary_Z_flag == ON) {

#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(j = 1; j <= j_max - 1; j++) {       // x = 0 plane
                	for(k = 1; k <= k_max - 1; k++) { 
                                rho[0][j][k] = rho_predictor[0][j][k];
                                u[0][j][k] = u_predictor[0][j][k];
                                v[0][j][k] = v_predictor[0][j][k];
                                w[0][j][k] = w_predictor[0][j][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(j = 1; j <= j_max - 1; j++) {       // x = x_max plane
                	for(k = 1; k <= k_max - 1; k++) { 
                                rho[i_max][j][k] = rho_predictor[i_max][j][k];
                                u[i_max][j][k] = u_predictor[i_max][j][k];
                                v[i_max][j][k] = v_predictor[i_max][j][k];
                                w[i_max][j][k] = w_predictor[i_max][j][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // y = 0 plane
                	for(k = 1; k <= k_max - 1; k++) { 
                                rho[i][0][k] = rho_predictor[i][0][k];
                                v[i][0][k] = v_predictor[i][0][k];
                                u[i][0][k] = u_predictor[i][0][k];
                                w[i][0][k] = w_predictor[i][0][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // y = y_max plane
                	for(k = 1; k <= k_max - 1; k++) { 
                                rho[i][j_max][k] = rho_predictor[i][j_max][k];
                                v[i][j_max][k] = v_predictor[i][j_max][k];
                                u[i][j_max][k] = u_predictor[i][j_max][k];
                                w[i][j_max][k] = w_predictor[i][j_max][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // z = 0 plane
                	for(j = j; j <= j_max - 1; j++) { 
                                rho[i][j][0] = rho_predictor[i][j][0];
                                w[i][j][0] = w_predictor[i][j][0];
                                u[i][j][0] = u_predictor[i][j][0];
                                v[i][j][0] = v_predictor[i][j][0];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // z = z_max plane
                	for(j = 1; j <= j_max - 1; j++) { 
                                rho[i][j][k_max] = rho_predictor[i][j][k_max];
                                w[i][j][k_max] = w_predictor[i][j][k_max];
                                u[i][j][k_max] = u_predictor[i][j][k_max];
                                v[i][j][k_max] = v_predictor[i][j][k_max];
			}
                }
        }
}

/* now deal with rho in region where u is held constant	*/
	for(i = i_init_min; i <= i_init_max; i++) {
		for(j = j_init_min; j <= j_init_max; j++) {
			for(k = k_init_min; k <= k_init_max; k++) {
				rho[i][j][k] = rho[i-1][j][k] + (dx_grid[i-1]*rho_air/(c_s*c_s))*nu*second_deriv_y(u,i,j,k);
			}
		}
	}

	if(accum_flag == YES) {
		accum_filtering_data();
		++n_accum_buffer_count;
	}

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
        for(i = 1; i <= i_max-1; i++) {
                for(j = 1; j <= j_max-1; j++) {
                        for(k = 1; k <= k_max-1; k++) {
                                if(fabs(rho[i][j][k]) > 1e3) {
                                        fprintf(fp_log,"(a) rho too large %g\t%d\t%d\t%d\n",rho[i][j][k],i,j,k);
                                        nan_flag = YES;
                                }
                        }
                }
        }
        if(nan_flag == YES) {
                record_data2(n_file_num);
                exit(0);
        }
/* */

/* now do MacCormack again but this time lead with backward derivatives */
{

#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"starting third mac loop (4)\tt = %g\n",current_t - start_t);
#endif
// fprintf(fp_log,"starting third mac loop (4)\tt = %g\n",current_t - start_t);
// fflush(fp_log);

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k)
#endif

	for(i = 1; i <= i_max-1; i++) {
		for(j = 1; j <= j_max-1; j++) {
			for(k = 1; k <= k_max-1; k++) {
/* for interior points, away from any boundaries			 */
				if(geometry[i][j][k] == OPEN) {
				  rho_predictor[i][j][k] = rho[i][j][k]
				  - (dt/dx_grid[i-1]) * u[i][j][k] * (rho[i][j][k] - rho[i-1][j][k])
				  - (dt/dx_grid[i-1]) * rho_air * (u[i][j][k] - u[i-1][j][k])
				  - (dt/dy_grid[j-1]) * v[i][j][k] * (rho[i][j][k] - rho[i][j-1][k])
				  - (dt/dy_grid[j-1]) * rho_air * (v[i][j][k] - v[i][j-1][k])
				  - (dt/dz_grid[k-1]) * w[i][j][k] * (rho[i][j][k] - rho[i][j][k-1])
				  - (dt/dz_grid[k-1]) * rho_air * (w[i][j][k] - w[i][j][k-1])
				  + artificial_viscosity(i,j,k,rho,rho,viscosity_1,viscosity_2);

				  u_predictor[i][j][k] = u[i][j][k] 
				  - (dt/dx_grid[i-1]) * u[i][j][k] * (u[i][j][k] - u[i-1][j][k])
				  - (dt/dy_grid[j-1]) * v[i][j][k] * (u[i][j][k] - u[i][j-1][k])
				  - (dt/dz_grid[k-1]) * w[i][j][k] * (u[i][j][k] - u[i][j][k-1])
				  -(c_s*c_s/rho_air)*(dt/dx_grid[i-1]) * (rho[i][j][k] - rho[i-1][j][k])
				  +nu*dt*(second_deriv_x(u,i,j,k)+second_deriv_y(u,i,j,k)+second_deriv_z(u,i,j,k))
				  + artificial_viscosity(i,j,k,u,rho,viscosity_1,viscosity_2);

				  v_predictor[i][j][k] = v[i][j][k] 
				  - (dt/dx_grid[i-1]) * u[i][j][k] * (v[i][j][k] - v[i-1][j][k])
				  - (dt/dy_grid[j-1]) * v[i][j][k] * (v[i][j][k] - v[i][j-1][k])
				  - (dt/dz_grid[k-1]) * w[i][j][k] * (v[i][j][k] - v[i][j][k-1])
				  -(c_s*c_s/rho_air)*(dt/dy_grid[j-1]) * (rho[i][j][k] - rho[i][j-1][k])
				  +nu*dt*(second_deriv_x(v,i,j,k)+second_deriv_y(v,i,j,k)+second_deriv_z(v,i,j,k))
				  + artificial_viscosity(i,j,k,v,rho,viscosity_1,viscosity_2);

				  w_predictor[i][j][k] = w[i][j][k] 
				  - (dt/dx_grid[i-1]) * u[i][j][k] * (w[i][j][k] - w[i-1][j][k])
				  - (dt/dy_grid[j-1]) * v[i][j][k] * (w[i][j][k] - w[i][j-1][k])
				  - (dt/dz_grid[k-1]) * w[i][j][k] * (w[i][j][k] - w[i][j][k-1])
				  -(c_s*c_s/rho_air)*(dt/dz_grid[k-1]) * (rho[i][j][k] - rho[i][j][k-1])
				  +nu*dt*(second_deriv_x(w,i,j,k)+second_deriv_y(w,i,j,k)+second_deriv_z(w,i,j,k))
				  + artificial_viscosity(i,j,k,w,rho,viscosity_1,viscosity_2);
				}
			}
		}
	}
}

#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"finished third mac loop (5)\tt = %g\n",current_t - start_t);
#endif

// fprintf(fp_log,"finished third mac loop (5)\tt = %g\n",current_t - start_t);
// fflush(fp_log);

{

/* next deal with points on the surface of the recorder - these have an acoustic impedance Z_rec_1 + i omega Z_rec_2 */

if(surface_normal_flag == NO) {

#ifdef CLUSTER
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k,m)
#endif

	for(m = 0; m < n_boundary; m++) {
		i = x_boundary[m];
		j = y_boundary[m];
		k = z_boundary[m];
		if(boundary_flag[m] == XUP) {
			v_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dx_grid[i])*rho_air*(u[i+1][j][k] - u[i][j][k])
						-(dt/dx_grid[i])*u[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == XDOWN) {
			v_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dx_grid[i-1])*rho_air*(u[i][j][k] - u[i-1][j][k])
						-(dt/dx_grid[i-1])*u[i][j][k]*(rho[i][j][k] - rho[i-1][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == YUP) {
			u_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dy_grid[j])*rho_air*(v[i][j+1][k] - v[i][j][k])
						-(dt/dy_grid[j])*v[i][j][k]*(rho[i][j+1][k] - rho[i][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == YDOWN) {
			u_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dy_grid[j-1])*rho_air*(v[i][j][k] - v[i][j-1][k])
						-(dt/dy_grid[j-1])*v[i][j][k]*(rho[i][j][k] - rho[i][j-1][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == ZUP) {
			u_predictor[i][j][k] = v_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dz_grid[k])*rho_air*(w[i][j][k+1] - w[i][j][k])
						-(dt/dz_grid[k])*w[i][j][k]*(rho[i][j][k+1] - rho[i][j][k])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
		if(boundary_flag[m] == ZDOWN) {
			u_predictor[i][j][k] = v_predictor[i][j][k] = 0.0;
			rho_predictor[i][j][k] = rho[i][j][k]
						-(dt/dz_grid[k-1])*rho_air*(w[i][j][k] - w[i][j][k-1])
						-(dt/dz_grid[k-1])*w[i][j][k]*(rho[i][j][k] - rho[i][j][k-1])
                                + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
			w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + rho_predictor[i][j][k] / Z_rec_1_2_plus;
		}
	}
}
else {
/* next deal with points on the surface of the recorder using the surface normal info 
							- these have an acoustic impedance Z */
#ifdef CLUSTER
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k,m)
#endif

	for(m = 0; m < n_surface_points; m++) {
		i = i_surface[m];
		j = j_surface[m];
		k = k_surface[m];
		rho_predictor[i][j][k] = rho[i][j][k] + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
		if(x_hat[m] > 0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dx_grid[i])*rho_air*(u[i+1][j][k] - u[i][j][k])
				-(dt/dx_grid[i])*u[i][j][k]*(rho[i+1][j][k] - rho[i][j][k]);
		}
		else if(x_hat[m] < -0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dx_grid[i-1])*rho_air*(u[i][j][k] - u[i-1][j][k])
				-(dt/dx_grid[i-1])*u[i][j][k]*(rho[i][j][k] - rho[i-1][j][k]);
		}
// fixed a bug in the next 4 if statements -- used index i for all the grid indices
// fixed in v. 8.3
		if(y_hat[m] > 0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dy_grid[j])*rho_air*(v[i][j+1][k] - v[i][j][k])
				-(dt/dy_grid[j])*v[i][j][k]*(rho[i][j+1][k] - rho[i][j][k]);
		}
		else if(y_hat[m] < -0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dy_grid[j-1])*rho_air*(v[i][j][k] - v[i][j-1][k])
				-(dt/dy_grid[j-1])*v[i][j][k]*(rho[i][j][k] - rho[i][j-1][k]);
		}
		if(z_hat[m] > 0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dz_grid[k])*rho_air*(w[i][j][k+1] - w[i][j][k])
				-(dt/dz_grid[k])*w[i][j][k]*(rho[i][j][k+1] - rho[i][j][k]);
		}
		else if(z_hat[m] < -0.1) {
			rho_predictor[i][j][k] += 
				-(dt/dz_grid[k-1])*rho_air*(w[i][j][k] - w[i][j][k-1])
				-(dt/dz_grid[k-1])*w[i][j][k]*(rho[i][j][k] - rho[i][j][k-1]);
		}

		vel_perp = rho_predictor[i][j][k] / Z_rec_1_2_plus;
		u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + vel_perp * x_hat[m]; 
		v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + vel_perp * y_hat[m]; 
		w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + vel_perp * z_hat[m]; 
	}
/*
	for(m = 0; m < n_surface_points; m++) {
		i = i_surface[m];
		j = j_surface[m];
		k = k_surface[m];
		rho_predictor[i][j][k] = rho[i][j][k]
					-(dt/dx_grid[i])*rho_air*(u[i+1][j][k] - u[i][j][k])
					-(dt/dx_grid[i])*u[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
					-(dt/dy_grid[i])*rho_air*(v[i+1][j][k] - v[i][j][k])
					-(dt/dy_grid[i])*v[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
					-(dt/dz_grid[i])*rho_air*(w[i+1][j][k] - w[i][j][k])
					-(dt/dz_grid[i])*w[i][j][k]*(rho[i+1][j][k] - rho[i][j][k])
                               + artificial_viscosity_on_recorder(m,i,j,k,viscosity_1,viscosity_2);
		vel_perp = rho_predictor[i][j][k] / Z_rec_1_2_plus;
		u_predictor[i][j][k] = u[i][j][k] * Z_rec_2_prime + vel_perp * x_hat[m]; 
		v_predictor[i][j][k] = v[i][j][k] * Z_rec_2_prime + vel_perp * y_hat[m]; 
		w_predictor[i][j][k] = w[i][j][k] * Z_rec_2_prime + vel_perp * z_hat[m]; 
	}
*/
}

// fprintf(fp_log,"starting boundary points \tt = %g\n",current_t - start_t);
// fflush(fp_log);

/* next deal with points on the boundaries - 
	these have an acoustic impedance Z = Z_1 + i omega Z_2 
   note that we don't have to worry about the corners
*/
	if(boundary_Z_flag == ON) {

#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(j = 1; j <= j_max - 1; j++) {	// boundary plane at x = 0
			for(k = 1; k <= k_max - 1; k++) {
				v_predictor[0][j][k] = w_predictor[0][j][k] = 0.0;
				rho_predictor[0][j][k] = rho[0][j][k] 
							-(dt/dx_grid[0])*rho_air*(u[1][j][k] - u[0][j][k])
							-(dt/dx_grid[0])*u[0][j][k]*(rho[1][j][k] - rho[0][j][k])
               	                 + artificial_viscosity_on_boundary(0,j,k,viscosity_1,viscosity_2);
				u_predictor[0][j][k] 
					= u[0][j][k] * Z_bound_2_prime + rho_predictor[0][j][k] / Z_bound_1_2_plus;
			}
		}
// fprintf(fp_log,"x=0 \tt = %g\n",current_t - start_t);
// fflush(fp_log);
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(j = 1; j <= j_max - 1; j++) {	// boundary at x = x_max
			for(k = 1; k <= k_max - 1; k++) {
				v_predictor[i_max][j][k] = w_predictor[i_max][j][k] = 0.0;
				rho_predictor[i_max][j][k] = rho[i_max][j][k]
// error found 1-10-13					-(dt/dx_grid[i_max-1])*rho_air*(u[i_max][j][k] - u[i_max-1][k][j])
					-(dt/dx_grid[i_max-1])*rho_air*(u[i_max][j][k] - u[i_max-1][j][k])
// error found 1-5-13			-(dt/dx_grid[i_max-1])*u[i_max][k][j]*(rho[i_max][j][k]-rho[i_max-1][k][j])
// error found 1-9-13					-(dt/dx_grid[i_max-1])*u[i_max][j][k]*(rho[i_max][j][k]-rho[i_max-1][k][j])
					-(dt/dx_grid[i_max-1])*u[i_max][j][k]*(rho[i_max][j][k]-rho[i_max-1][j][k])
               	                 + artificial_viscosity_on_boundary(i_max,j,k,viscosity_1,viscosity_2);
				u_predictor[i_max][j][k] 
				= u[i_max][j][k] * Z_bound_2_prime + rho_predictor[i_max][j][k] / Z_bound_1_2_plus;
			}
		}
// fprintf(fp_log,"x=x_max \tt = %g\n",current_t - start_t);
// fflush(fp_log);
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// y = 0
			for(k = 1; k <= k_max - 1; k++) {
				u_predictor[i][0][k] = w_predictor[i][0][k] = 0.0;
				rho_predictor[i][0][k] = rho[i][0][k]
							-(dt/dy_grid[0])*rho_air*(v[i][1][k] - v[i][0][k])
							-(dt/dy_grid[0])*v[i][0][k]*(rho[i][1][k] - rho[i][0][k])
               	                 + artificial_viscosity_on_boundary(i,0,k,viscosity_1,viscosity_2);
				v_predictor[i][0][k] 
				= v[i][0][k] * Z_bound_2_prime + rho_predictor[i][0][k] / Z_bound_1_2_plus;
			}
		}
// fprintf(fp_log,"y=0 \tt = %g\n",current_t - start_t);
// fflush(fp_log);
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// y = y_max plane
			for(k = 1; k <= k_max - 1; k++) {
				u_predictor[i][j_max][k] = w_predictor[i][j_max][k] = 0.0;
				rho_predictor[i][j_max][k] = rho[i][j_max][k]
				 -(dt/dy_grid[j_max-1])*rho_air*(v[i][j_max][k] - v[i][j_max-1][k])
				 -(dt/dy_grid[j_max-1])*v[i][j_max][k]*(rho[i][j_max][k]-rho[i][j_max-1][k])
               	                 + artificial_viscosity_on_boundary(i,j_max,k,viscosity_1,viscosity_2);
				v_predictor[i][j_max][k] = 
				v[i][j_max][k] * Z_bound_2_prime + rho_predictor[i][j_max][k] / Z_bound_1_2_plus;
			}
		}
// fprintf(fp_log,"y_max done=0 \tt = %g\n",current_t - start_t);
// fflush(fp_log);
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// z = 0 plane
			for(j = 1; j <= j_max - 1; j++) {
				u_predictor[i][j][0] = v_predictor[i][j][0] = 0.0;
				rho_predictor[i][j][0] = rho[i][j][0]
							-(dt/dz_grid[0])*rho_air*(w[i][j][1] - w[i][j][0])
							-(dt/dz_grid[0])*w[i][j][0]*(rho[i][j][1] - rho[i][j][0])
               	                 + artificial_viscosity_on_boundary(i,j,0,viscosity_1,viscosity_2);
				w_predictor[i][j][0] 
				= w[i][j][0] * Z_bound_2_prime + rho_predictor[i][j][0] / Z_bound_1_2_plus;
			}
		}
// fprintf(fp_log,"z=0 done \tt = %g\n",current_t - start_t);
// fflush(fp_log);
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
		for(i = 1; i <= i_max - 1; i++) {	// z = z_max plane
			for(j = 1; j <= j_max - 1; j++) {
				u_predictor[i][j][k_max] = v_predictor[i][j][k_max] = 0.0;
				rho_predictor[i][j][k_max] = rho[i][j][k_max]
				 -(dt/dz_grid[k_max-1])*rho_air*(w[i][j][k_max] - w[i][j][k_max-1])
				 -(dt/dz_grid[k_max-1])*w[i][j][k_max]*(rho[i][j][k_max]-rho[i][j][k_max-1])
               	                 + artificial_viscosity_on_boundary(i,j,k_max,viscosity_1,viscosity_2);
				w_predictor[i][j][k_max] = 
				w[i][j][k_max] * Z_bound_2_prime + rho_predictor[i][j][k_max] / Z_bound_1_2_plus;
			}
		}
	}
}

// fprintf(fp_log,"dealing with init_region\tt = %g\n",current_t - start_t);
// fflush(fp_log);

/* now deal with rho in region where u is held constant	*/
	for(i = i_init_min; i <= i_init_max; i++) {
		for(j = j_init_min; j <= j_init_max; j++) {
			for(k = k_init_min; k <= k_init_max; k++) {
				rho_predictor[i][j][k] = rho_predictor[i-1][j][k] + (dx_grid[i-1]*rho_air/(c_s*c_s))*nu*second_deriv_y(u_predictor,i,j,k);
			}
		}
	}

{

#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"starting fourth mac loop (6)\tt = %g\n",current_t - start_t);
#endif
// fprintf(fp_log,"starting fourth mac loop (6)\tt = %g\n",current_t - start_t);
// fflush(fp_log);

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k)
#endif

	for(i = 1; i <= i_max-1; i++) {
		for(j = 1; j <= j_max-1; j++) {
			for(k = 1; k <= k_max-1; k++) {
/* for interior points, away from any boundaries			 */
				if(geometry[i][j][k] == OPEN) {

				  rho[i][j][k] = 0.5 * (rho[i][j][k] + rho_predictor[i][j][k] 
-(dt/dx_grid[i]) * rho_air * (u_predictor[i+1][j][k] - u_predictor[i][j][k])  
-(dt/dx_grid[i]) * u_predictor[i][j][k] * (rho_predictor[i+1][j][k] - rho_predictor[i][j][k])
-(dt/dy_grid[j]) * rho_air * (v_predictor[i][j+1][k] - v_predictor[i][j][k])
-(dt/dy_grid[j]) * v_predictor[i][j][k] * (rho_predictor[i][j+1][k] - rho_predictor[i][j][k])
-(dt/dz_grid[k]) * rho_air * (w_predictor[i][j][k+1] - w_predictor[i][j][k])
-(dt/dz_grid[k]) * w_predictor[i][j][k] * (rho_predictor[i][j][k+1] - rho_predictor[i][j][k])
				  + artificial_viscosity(i,j,k,rho_predictor,rho_predictor,viscosity_1,viscosity_2));

				  u[i][j][k] = 0.5 * (u[i][j][k] + u_predictor[i][j][k]
-(dt/dx_grid[i]) * u_predictor[i][j][k] * (u_predictor[i+1][j][k] - u_predictor[i][j][k]) 
-(dt/dy_grid[j]) * v_predictor[i][j][k] * (u_predictor[i][j+1][k] - u_predictor[i][j][k]) 
-(dt/dz_grid[k]) * w_predictor[i][j][k] * (u_predictor[i][j][k+1] - u_predictor[i][j][k]) 
-(c_s*c_s/rho_air)*(dt/dx_grid[i]) * (rho_predictor[i+1][j][k] - rho_predictor[i][j][k])
+nu*dt*(second_deriv_x(u_predictor,i,j,k)+second_deriv_y(u_predictor,i,j,k)+second_deriv_z(u_predictor,i,j,k))
				  + artificial_viscosity(i,j,k,u_predictor,rho_predictor,viscosity_1,viscosity_2));
	
				  v[i][j][k] = 0.5 * (v[i][j][k] + v_predictor[i][j][k]
-(dt/dx_grid[i]) * u_predictor[i][j][k] * (v_predictor[i+1][j][k] - v_predictor[i][j][k]) 
-(dt/dy_grid[j]) * v_predictor[i][j][k] * (v_predictor[i][j+1][k] - v_predictor[i][j][k]) 
-(dt/dz_grid[k]) * w_predictor[i][j][k] * (v_predictor[i][j][k+1] - v_predictor[i][j][k]) 
-(c_s*c_s/rho_air)*(dt/dy_grid[j]) * (rho_predictor[i][j+1][k] - rho_predictor[i][j][k])
+nu*dt*(second_deriv_x(v_predictor,i,j,k)+second_deriv_y(v_predictor,i,j,k)+second_deriv_z(v_predictor,i,j,k))
				  + artificial_viscosity(i,j,k,v_predictor,rho_predictor,viscosity_1,viscosity_2));
	
				  w[i][j][k] = 0.5 * (w[i][j][k] + w_predictor[i][j][k]
-(dt/dx_grid[i]) * u_predictor[i][j][k] * (w_predictor[i+1][j][k] - w_predictor[i][j][k]) 
-(dt/dy_grid[j]) * v_predictor[i][j][k] * (w_predictor[i][j+1][k] - w_predictor[i][j][k]) 
-(dt/dz_grid[k]) * w_predictor[i][j][k] * (w_predictor[i][j][k+1] - w_predictor[i][j][k]) 
-(c_s*c_s/rho_air)*(dt/dz_grid[k]) * (rho_predictor[i][j][k+1] - rho_predictor[i][j][k])
+nu*dt*(second_deriv_x(w_predictor,i,j,k)+second_deriv_y(w_predictor,i,j,k)+second_deriv_z(w_predictor,i,j,k))
				  + artificial_viscosity(i,j,k,w_predictor,rho_predictor,viscosity_1,viscosity_2));
				}
			}
		}
	}
}

{
/* finally - update points on surface of recorder */
#ifdef CLUSTER
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor) private(i,j,k,m)
#endif

        for(m = 0; m < n_boundary; m++) {
                i = x_boundary[m];
                j = y_boundary[m];
                k = z_boundary[m];
                u[i][j][k] = u_predictor[i][j][k];
                v[i][j][k] = v_predictor[i][j][k];
                w[i][j][k] = w_predictor[i][j][k];
                rho[i][j][k] = rho_predictor[i][j][k];
        }

/* update points on the boundaries - note that we don't have to worry about the corners
*/

        if(boundary_Z_flag == ON) {
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(j = 1; j <= j_max - 1; j++) {       // x = 0 plane
                	for(k = 1; k <= k_max - 1; k++) {       
                                rho[0][j][k] = rho_predictor[0][j][k];
                                u[0][j][k] = u_predictor[0][j][k];
                                v[0][j][k] = v_predictor[0][j][k];
                                w[0][j][k] = w_predictor[0][j][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(j = 1; j <= j_max - 1; j++) {       // x = x_max plane
                	for(k = 1; k <= k_max - 1; k++) {       
                                rho[i_max][j][k] = rho_predictor[i_max][j][k];
                                u[i_max][j][k] = u_predictor[i_max][j][k];
                                v[i_max][j][k] = v_predictor[i_max][j][k];
                                w[i_max][j][k] = w_predictor[i_max][j][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // y = 0 plane
                	for(k = 1; k <= k_max - 1; k++) {       
                                rho[i][0][k] = rho_predictor[i][0][k];
                                v[i][0][k] = v_predictor[i][0][k];
                                u[i][0][k] = u_predictor[i][0][k];
                                w[i][0][k] = w_predictor[i][0][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // y = y_max plane
                	for(k = 1; k <= k_max - 1; k++) {       
                                rho[i][j_max][k] = rho_predictor[i][j_max][k];
                                v[i][j_max][k] = v_predictor[i][j_max][k];
                                u[i][j_max][k] = u_predictor[i][j_max][k];
                                w[i][j_max][k] = w_predictor[i][j_max][k];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // z = 0 plane
                	for(j = 1; j <= j_max - 1; j++) {       
                                rho[i][j][0] = rho_predictor[i][j][0];
                                w[i][j][0] = w_predictor[i][j][0];
                                u[i][j][0] = u_predictor[i][j][0];
                                v[i][j][0] = v_predictor[i][j][0];
			}
                }
#ifdef CLUSTER
#pragma omp collapse(2)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
                for(i = 1; i <= i_max - 1; i++) {       // z = z_max plane
                	for(j = 1; j <= j_max - 1; j++) {       
                                rho[i][j][k_max] = rho_predictor[i][j][k_max];
                                w[i][j][k_max] = w_predictor[i][j][k_max];
                                u[i][j][k_max] = u_predictor[i][j][k_max];
                                v[i][j][k_max] = v_predictor[i][j][k_max];
			}
                }
        }
}

/* now deal with rho in region where u is held constant	*/
	for(i = i_init_min; i <= i_init_max; i++) {
		for(j = j_init_min; j <= j_init_max; j++) {
			for(k = k_init_min; k <= k_init_max; k++) {
				rho[i][j][k] = rho[i-1][j][k] + (dx_grid[i-1]*rho_air/(c_s*c_s))*nu*second_deriv_y(u,i,j,k); 
			}
		}
	}

// now test for nan and record problem locations in log_file - if any are found then exit
/*
 */
#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
        for(i = 1; i <= i_max-1; i++) {
                for(j = 1; j <= j_max-1; j++) {
                        for(k = 1; k <= k_max-1; k++) {
                                if(fabs(rho[i][j][k]) > 1e3) {
                                        fprintf(fp_log,"(c)rho too large %g\t%d\t%d\t%d\n",rho[i][j][k],i,j,k);
                                        nan_flag = YES;
                                }
                        }
                }
        }
        if(nan_flag == YES) {
                record_data2(n_file_num);
                exit(0);
        }
/**/

{

#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"finished fourth mac loop (7)\tt = %g\n",current_t - start_t);
#endif
// fprintf(fp_log,"finished fourth mac loop (7)\tt = %g\n",current_t - start_t);
// fflush(fp_log);

	t += 2 * dt;

// looking for the problem
// save_for_restart();
// my_exit();

	if((t <= t_blow_transient) || 
			((t > t_stop_blowing) && (t <= t_stop_blowing+t_ramp_down))) {
/*
 #ifdef CLUSTER
 #pragma omp collapse(3)
 #pragma omp parallel for shared(u,u_predictor,rho) private(i,j,k)
 #endif
*/

		for(i = i_init_min; i <= i_init_max; i++) {
			for(j = j_init_min; j <= j_init_max; j++) {
				for(k = k_init_min; k <= k_init_max; k++) {
					u_predictor[i][j][k] = u[i][j][k] = blowing_velocity(t,i,j,k);
					v[i][j][k] = w[i][j][k] = 0.0;
					v_predictor[i][j][k] = w_predictor[i][j][k] = 0.0;
				}
			}
		}
	}

	for(i = i_init_min; i <= i_init_max; i++) {
		for(j = j_init_min; j <= j_init_max; j++) {
			for(k = k_init_min; k <= k_init_max; k++) {
				if(geometry[i][j][k] != INIT_POINT) {
					fprintf(fp_log,"error C initializing rho i,j,k (%d %d %d) not in init region\n",i,j,k);
					my_exit();
				}
				rho[i][j][k] = rho[i-1][j][k] + (dx_grid[i-1]*rho_air/(c_s*c_s))*nu*second_deriv_y(u,i,j,k);
			}
		}
	}
}
{

	if(t >= t_stop_blowing + t_ramp_down) {
#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,rho,rho_predictor,geometry) private(i,j,k)
#endif
		for(i = i_init_min; i <= i_init_max; i++) {
			for(j = j_init_min; j <= j_init_max; j++) {
				for(k = k_init_min; k <= k_init_max; k++) {
					geometry[i][j][k] = OPEN; 
				}
			}
		}
	}
}

// new for debugging only
// if(t >= t_blow_transient) {
/*
	fprintf(fp_log,"init region at t = %g:\n",t);
        for(i = i_init_min-1; i <= i_init_max+1; i++) {
                for(j = j_init_min-1; j <= j_init_max+1; j++) {
                        for(k = k_init_min-1; k <= k_init_max+1; k++) {
				fprintf(fp_log,"%d\t%d\t%d\t%g\t%g\n",i,j,k,rho[i][j][k],u[i][j][k]);
                        }
                }
        }
	exit(0);
*/
// }
// end of debugging code

	if(accum_flag == YES) {
		accum_filtering_data();
		++n_accum_buffer_count;
	}
	for(i = 1; i <= n_record_sound; i++) {
		rho_t_sum[i] += rho[i_record[i]][j_record[i]][k_record[i]];
	}

	t_sum += t;
	k2++;
	if(k2 >= n_record_2) {
		t_sum /= k2;
		for(i = 1; i <= n_record_sound; i++) {
			fprintf(fp_rho_t[i],"%g\t%g\n",t_sum,rho_t_sum[i]/k2);
			fflush(fp_rho_t[i]);
		}
		k2 = 0;
		for(i = 1; i <= n_record_sound; i++) {
			rho_t_sum[i] = 0.0;
		}
		t_sum = 0.0;
// new for debugging only
		rho_max = x_max = y_max = z_max = 0.0;
#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
        	for(i = 1; i <= i_max-1; i++) {
               		for(j = 1; j <= j_max-1; j++) {
               	        	for(k = 1; k <= k_max-1; k++) {
               	                	if(fabs(rho[i][j][k]) > fabs(rho_max)) {
                                        	rho_max = rho[i][j][k];
                                        	x_max = x_grid[i];
                                        	y_max = y_grid[j];
                                        	z_max = z_grid[k];
               	                 	}
               	         	}
               	 	}
        	}
		fprintf(fp_log,"at t = %g\trho(max) = %g at (%g, %g, %g)\n",t,rho_max,x_max,y_max,z_max);
		fflush(fp_log);
// now test for nan and record problem locations in log_file - if any are found then exit
#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_predictor,v_predictor,w_predictor,rho_predictor)  private(i,j,k) 
#endif
        	for(i = 1; i <= i_max-1; i++) {
               		for(j = 1; j <= j_max-1; j++) {
                        	for(k = 1; k <= k_max-1; k++) {
                                	if(fabs(rho[i][j][k]) > 1e3) {
                                        	fprintf(fp_log,"(d)rho too large %g\t%d\t%d\t%d\n",rho[i][j][k],i,j,k);
                                        	nan_flag = YES;
                                	}
                        	}
                	}
        	}
        	if(nan_flag == YES) {
                	record_data2(n_file_num);
                	my_exit();
        	}
// want to see where things blow up
// save_for_restart();

// end of debugging section
	}

	if((t >= t_next_map) && (t < t_map_end)) {
		accum_map(rho_buf,rho);
		accum_map(u_buf,u);
		accum_map(v_buf,v);
		accum_map(w_buf,w);	// add this 08_11_12 
		k_map++;
	}

	if((t >= t_next_record) && (t < t_map_end)) {
		normalize_map(rho_buf,k_map);
		normalize_map(u_buf,k_map);
		normalize_map(v_buf,k_map);
		normalize_map(w_buf,k_map);	// add this 08_11_12 
		record_data(n_file_num);
// fixed this bug 6-6-12 v8b		k = 0;
		k_map = 0;
		t_next_record += t_record_map;
		t_next_map = t_next_record - t_map_ave;
		reset_map(rho_buf);
		reset_map(u_buf);
		reset_map(v_buf);
		reset_map(w_buf);	// add this 08_11_12 
		n_file_num++;
/* fprintf(stdout,"%g\n",t); */
	}

	if(t >= t_next_restart_save) {
		save_for_restart();
		t_next_restart_save += t_restart_save_interval;
	}
	if(t >= t_viscosity_start) {
		viscosity_1 = viscosity_1_0;
		viscosity_2 = viscosity_2_0;
	}
	
	if((accum_flag == YES) && (t > t_accum_end)) {
		t = reset_filtering_data(n_accum_buffer_count,t);
		zero_filtering_data();
		n_accum_buffer_count = 0;
		t_next_accum += t_accum_interval;
		t_accum_end = t_next_accum + t_accum;
		accum_flag = NO;
	}
	else if((accum_flag == NO) && (t > t_next_accum)) {
		accum_flag = YES;
		fprintf(fp_log,"starting to filter rho, u, and v at t = %g\n",t);
		fflush(fp_log);
	}


#ifdef BENCHMARK
current_t = omp_get_wtime();
fprintf(fp_log,"finished loop %d in run() (8)\tt = %g\n\n",n_benchmark,current_t - start_t);
++n_benchmark;
if(n_benchmark >= 10) exit(0);
#endif

   }
   for(i = 1; i <= n_record_sound; i++) {
	fclose(fp_rho_t[i]);
   }

   return;
}

// normalize_map(float q[][N_Y_MAX][N_Z_MAX],int n)

normalize_map(float ***q,int n)
{
	int i,j,k;

	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				q[i][j][k] /= (double)n;
			}
		}
	}
	return;	
}

/*
 *	accumulate values in the buffers used to average the maps for rho, u, and v
 */
// accum_map(float q1[][N_Y_MAX][N_Z_MAX], float q2[][N_Y_MAX][N_Z_MAX])
accum_map(float ***q1, float ***q2)
{
	int i,j,k;

	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				q1[i][j][k] += q2[i][j][k];
			}
		}
	}
	return;	
}

/*
 *	reset to zero the buffers used to average the maps for rho, u, and v
 */
// reset_map(float q[][N_Y_MAX][N_Z_MAX])
reset_map(float ***q)
{
	int i,j,k;

	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				q[i][j][k] = 0.0; 
			}
		}
	}
	return;	
}

/* 
 * record full results for rho, u, and v as a function of position
 * put results in results_directory_full_path and have files numbered
 *
 * use ONE file to hold everything - a change from the 2D calcs
 *
 * value of t for each file is in file t_record_map.dat
 */
record_data(n)
int n;
{
	FILE *fp_save;
	int i,j,k;
	char save_file[300];

	sprintf(save_file,"%s/%s.rho_u_v_w.dat.%d",scratch_directory,results_directory,n);

	fp_save = fopen(save_file,"w");
	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
// change number of sig figs to save - change made on 10-28-12
				fprintf(fp_save,"%0.4g\t%0.4g\t%0.4g\t%0.4g\t%0.4g\t%0.4g\t%0.4g\n",
					x_grid[i],y_grid[j],z_grid[k],rho_buf[i][j][k],
					u_buf[i][j][k],v_buf[i][j][k],w_buf[i][j][k]);
			}
		}
	}

	fprintf(fp_t,"%g\n",t);
	fflush(fp_t);
   	fclose(fp_save);

	return;
}

/*
 *      similar to record_data but does not use buffering
 */
record_data2(n)
int n;
{
        FILE *fp_save;
        int i,j,k;
        char save_file[300];

        sprintf(save_file,"%s/%s.rho_u_v_w.dat.%d",scratch_directory,results_directory,n);

        fp_save = fopen(save_file,"w");
        for(i = 0; i <= i_max; i++) {
                for(j = 0; j <= j_max; j++) {
                        for(k = 0; k <= k_max; k++) {
                                fprintf(fp_save,"%g\t%g\t%g\t%g\t%g\t%g\t%g\n",
                                        x_grid[i],y_grid[j],z_grid[k],rho[i][j][k],
                                        u[i][j][k],v[i][j][k],w[i][j][k]);
                        }
                }
        }

        fprintf(fp_t,"%g\n",t);
        fflush(fp_t);
        fclose(fp_save);

}


/*
 *	need a routine to find and classify boundary points
 *
 *	have 7 cases to consider on surface of recorder - these are
 *	reflecting/non-slip surfaces and the computation of u, v, w, and rho
 *	at these points must be treated specially, different from the 
 *	standard MacCormack algorithm
 *
 *	case 1: a surface perpendicular to x with air on the +x side == XUP
 *	case 2: a surface perpendicular to x with air on the -x side == XDOWN
 *	case 3: a surface perpendicular to y with air on the +y side == YUP
 *	case 4: a surface perpendicular to y with air on the -y side == YDOWN
 *	case 5: a surface perpendicular to z with air on the +z side == ZUP
 *	case 6: a surface perpendicular to z with air on the -z side == ZDOWN
 *	case 7: a convex corner                               == CORNER
 *
 *	this classification scheme is based on surface normals, which is different
 *	from scheme used in the 2D work
 */

boundary() 
{
	int i,j,k;
	int n;

	n = 0;	/* number of special boundary points on recorder */

	for(i = 1; i <= i_max-1; i++) {
		for(j = 1; j <= j_max-1; j++) {
			for(k = 1; k <= k_max-1; k++) {
				if(geometry[i][j][k] == SOLID) { /* have found a recorder point */
/* case 1 */				if((geometry[i+1][j][k] == OPEN)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j][k-1] == SOLID)
						&& (geometry[i][j][k+1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = XUP;
						++n;
					}
/* case 2 */				else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == OPEN)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j-1][k] == SOLID) 
						&& (geometry[i][j][k-1] == SOLID)
						&& (geometry[i][j][k+1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = XDOWN;
						++n;
					}
/* case 3 */				else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j+1][k] == OPEN)
						&& (geometry[i][j][k-1] == SOLID)
						&& (geometry[i][j][k+1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = YUP;
						++n;
					}
/* case 4 */				else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j-1][k] == OPEN)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j][k-1] == SOLID)
						&& (geometry[i][j][k+1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = YDOWN;
						++n;
					}
/* case 5 */				else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j][k-1] == SOLID)
						&& (geometry[i][j][k+1] == OPEN)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = ZUP;
						++n;
					}
/* case 6 */				else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j][k-1] == OPEN)
						&& (geometry[i][j][k+1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = ZDOWN;
						++n;
					}
/* case 7 */				else if((geometry[i+1][j][k] == SOLID)
/* there are actually */			&& (geometry[i-1][j][k] == OPEN)
/* 8 different types  */			&& (geometry[i][j+1][k] == SOLID)
/* of case 7          */			&& (geometry[i][j-1][k] == OPEN)
						&& (geometry[i][j][k+1] == OPEN)
						&& (geometry[i][j][k-1] == SOLID)) {
/* they are all treated as */			x_boundary[n] = i;
/* having OPEN geometry */			y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
					else if((geometry[i+1][j][k] == OPEN)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j-1][k] == OPEN)
						&& (geometry[i][j][k+1] == OPEN)
						&& (geometry[i][j][k-1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
					else if((geometry[i+1][j][k] == OPEN)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j-1][k] == OPEN)
						&& (geometry[i][j][k+1] == SOLID)
						&& (geometry[i][j][k-1] == OPEN)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
					else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == OPEN)
						&& (geometry[i][j+1][k] == SOLID)
						&& (geometry[i][j-1][k] == OPEN)
						&& (geometry[i][j][k+1] == SOLID)
						&& (geometry[i][j][k-1] == OPEN)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
					else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == OPEN)
						&& (geometry[i][j+1][k] == OPEN)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j][k+1] == OPEN)
						&& (geometry[i][j][k-1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
					else if((geometry[i+1][j][k] == OPEN)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j+1][k] == OPEN)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j][k+1] == OPEN)
						&& (geometry[i][j][k-1] == SOLID)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
					else if((geometry[i+1][j][k] == OPEN)
						&& (geometry[i-1][j][k] == SOLID)
						&& (geometry[i][j+1][k] == OPEN)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j][k+1] == SOLID)
						&& (geometry[i][j][k-1] == OPEN)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
					else if((geometry[i+1][j][k] == SOLID)
						&& (geometry[i-1][j][k] == OPEN)
						&& (geometry[i][j+1][k] == OPEN)
						&& (geometry[i][j-1][k] == SOLID)
						&& (geometry[i][j][k+1] == SOLID)
						&& (geometry[i][j][k-1] == OPEN)) {
						x_boundary[n] = i;
						y_boundary[n] = j;
						z_boundary[n] = k;
						boundary_flag[n] = CORNER;
						++n;
					}
				}
			}
		}
	}
/*
	for(k = 0; k < n; k++) {
		if(boundary_flag[k] == CORNER) 
			geometry[x_boundary[k]][y_boundary[k]][z_boundary[k]] = OPEN; 
	}
*/

	return(n);
}
					
/*
 *	display the classification of the boundary points on the recorder
 *					
 *	1 indicates a surface parallel to x with the air above    == XUP
 *	2 indicates a surface parallel to x with the air below    == XDOWN
 *	3 indicates a surface parallel to y with the air on right == YUP
 *	4 indicates a surface parallel to y with the air on left  == YDOWN
 *	5 indicates a surface parallel to y with the air on right == ZUP
 *	6 indicates a surface parallel to y with the air on left  == ZDOWN
 *	7 indicates a convex corner                               == CORNER
 */

display_boundary()
{
	int i,j,k,m,flag;
	FILE *fp1,*fp2,*fp3,*fp4,*fp5,*fp6,*fp7;
	char mes[400];

	fp1 = fopen("b_case_1","w");
	fp2 = fopen("b_case_2","w");
	fp3 = fopen("b_case_3","w");
	fp4 = fopen("b_case_4","w");
	fp5 = fopen("b_case_5","w");
	fp6 = fopen("b_case_6","w");
	fp7 = fopen("b_case_7","w");

	for(m = 0; m < n_boundary; m++) {
		i = x_boundary[m];	
		j = y_boundary[m];	
		k = z_boundary[m];
		flag = boundary_flag[m];
		if(flag == XUP) fprintf(fp1,"%d\t%d\t%d\n",i,j,k);			
		if(flag == XDOWN) fprintf(fp2,"%d\t%d\t%d\n",i,j,k);			
		if(flag == YUP) fprintf(fp3,"%d\t%d\t%d\n",i,j,k);			
		if(flag == YDOWN) fprintf(fp4,"%d\t%d\t%d\n",i,j,k);			
		if(flag == ZUP) fprintf(fp5,"%d\t%d\t%d\n",i,j,k);			
		if(flag == ZDOWN) fprintf(fp6,"%d\t%d\t%d\n",i,j,k);			
		if(flag == CORNER) fprintf(fp7,"%d\t%d\t%d\n",i,j,k);			

//		if(flag == XUP) fprintf(fp1,"%g\t%g\n",x_grid[i],y_grid[j],z_grid[k]);			
//		if(flag == XDOWN) fprintf(fp2,"%g\t%g\n",x_grid[i],y_grid[j],z_grid[k]);			
//		if(flag == YUP) fprintf(fp3,"%g\t%g\n",x_grid[i],y_grid[j],z_grid[k]);			
//		if(flag == YDOWN) fprintf(fp4,"%g\t%g\n",x_grid[i],y_grid[j],z_grid[k]);			
//		if(flag == ZUP) fprintf(fp5,"%g\t%g\n",x_grid[k],y_grid[j],z_grid[k]);			
//		if(flag == ZDOWN) fprintf(fp6,"%g\t%g\n",x_grid[i],y_grid[j],z_grid[k]);			
//		if(flag == CORNER) fprintf(fp7,"%g\t%g\n",x_grid[i],y_grid[j],z_grid[k]);			
	}

	fclose(fp1);
	fclose(fp2);
	fclose(fp3);
	fclose(fp4);
	fclose(fp5);
	fclose(fp6);
	fclose(fp7);

/*

#ifdef CLUSTER
	sprintf(mes,"graph -P -E -h .8 -w .8 -u .3 -r -.2 -g 1 -m 0 -f b_case_1 -c '1' -m 0 -o -f b_case_2 -c '2' -m 0 -o -f b_case_3 -c '3' -m 0 -o -f b_case_4 -c '4' -m 0 -o -f b_case_5 -c '5' -m 0 | plot2ps > %s/boundary_map.eps",results_directory_full_path);
	system(mes);
//	sprintf(mes,"pstopdf %s/boundary_map.eps -o %s/boundary_map.pdf",results_directory_full_path,results_directory_full_path); 
//	system(mes);
#endif

#ifdef LAPTOP
	sprintf(mes,"graph -P -E -h .8 -w .8 -u .3 -r -.2 -g 1 -m 0 -f b_case_1 -c '1' -m 0 -o -f b_case_2 -c '2' -m 0 -o -f b_case_3 -c '3' -m 0 -o -f b_case_4 -c '4' -m 0 -o -f b_case_5 -c '5' -m 0 | plot2ps > %s/boundary_map.eps",results_directory_full_path);
	system(mes);
	sprintf(mes,"pstopdf %s/boundary_map.eps -o %s/boundary_map.pdf",results_directory_full_path,results_directory_full_path); 
	system(mes);
#endif
*/

	return;
}

/* 
 *	save data for a restart if desired
 *	
 *	put restart time in file t_restart
 *	put rho, u, v, and w data into one file "restart_maps.dat" using
 *	a format similar to the one used for saving maps above
 *	all in the results directory
 *
 *	the map data is stored in the format
 *	i	j	k	rho	u	v	w
 *	.	.	.
 *
 *	this is changed significantly from 2D version
 *	now put all map info into one file
 */
save_for_restart()
{
	int i,j,k;
	FILE *fp_map_restart;
	FILE *fp_t_restart;
	char tmp[300],tmp2[300],mes[400];
	static int first_time = YES;

	sprintf(tmp,"%s/%s.restart_maps.dat",scratch_directory,results_directory);

// before saving the latest maps, back up the previous restart info, just in case 
// the program stops in the middle of saving this stuff
// make some small changes here (10-28-12) to have better safety
	if(first_time == NO) {
		sprintf(tmp2,"%s/%s.backup_restart_maps",scratch_directory,results_directory);
		sprintf(mes,"cp %s %s\n",tmp,tmp2); 
		system(mes);
	}

	fp_map_restart = fopen(tmp,"w");

	sprintf(tmp,"%s/t_restart",results_directory_full_path);

// also save the the previous restart time
	if(first_time == NO) {
		sprintf(tmp2,"%s/backup_t_restart.dat",results_directory_full_path);
		sprintf(mes,"cp %s %s\n",tmp,tmp2); 
		system(mes);
	}

	fp_t_restart = fopen(tmp,"w");

	fprintf(fp_t_restart,"%g\n",t);
	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				fprintf(fp_map_restart,"%d\t%d\t%d\t%g\t%g\t%g\t%g\n",i,j,k,
					rho[i][j][k],u[i][j][k],v[i][j][k],w[i][j][k]);
			}
		}
	}

	fclose(fp_t_restart);
	fclose(fp_map_restart);
	first_time = NO;

	return;
}

/*
 * 	the velocity imposed in the channel is time dependent
 *	in this version it starts at 0 at t = 0, increases linearly
 *	up to a time t_blow_transient at which time it has the value u_init
 *
 *	it stays at u_init until t = t_stop_blowing after which it ramps down to
 *	zero in a time t_ramp_down and then stays at zero thereafter
 */
double blowing_velocity(t_current,i,j,k)
double t_current;
int i,j,k;
{
	double val;

	if(geometry[i][j][k] != INIT_POINT) {
		fprintf(fp_log,"error A initializing u and rho -- i,j,k (%d %d %d) not in init region\n",i,j,k);
		exit(0);
	}
	if(t_current <= t_blow_transient) {
		val = u_init * init_val_j[j] * init_val_k[k] * t_current / t_blow_transient;
	}
	else if((t_current > t_stop_blowing) && 
			(t_current <= t_stop_blowing+t_ramp_down)) {
		val = u_init * init_val_j[j] * init_val_k[k] * (1.0 - (t_current-t_stop_blowing)/t_ramp_down); 
	}
	else if(t_current > t_stop_blowing+t_ramp_down) {
		val = 0.0;
	}
	else {
		val = u_init * init_val_j[j] * init_val_k[k];
	}

	return(val);
}

double
// artificial_viscosity(int i,int j,int k,float q[][N_Y_MAX][N_Z_MAX],float r[][N_Y_MAX][N_Z_MAX],double v1_0,double v2_0)
artificial_viscosity(int i,int j,int k,float ***q,float ***r,double v1_0,double v2_0)
{
	double val;
	double e2x_plus,e4x_plus;
	double e2x_minus,e4x_minus;
	double e2x,e2y,e2z;
	double e2y_plus,e4y_plus;
	double e2y_minus,e4y_minus;
	double e2z_plus,e4z_plus;
	double e2z_minus,e4z_minus;
	double nux_plus,nux,nux_minus;
	double nuy_plus,nuy,nuy_minus;
	double nuz_plus,nuz,nuz_minus;
	double v1,v2;

	v1 = v1_0;
	v2 = v2_0;

	if((geometry[i+1][j][k] == OPEN) && (geometry[i-1][j][k] == OPEN) 
		&& (geometry[i][j+1][k] == OPEN) && (geometry[i][j-1][k] == OPEN)
		&& (geometry[i][j][k+1] == OPEN) && (geometry[i][j][k-1] == OPEN)) {

		nux_plus =  fabs(r[i+2][j][k]-2.0*r[i+1][j][k]+r[i][j][k])/(4.0*rho_air);
		nux =       fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
		nux_minus = fabs(r[i][j][k]-2.0*r[i-1][j][k]+r[i-2][j][k])/(4.0*rho_air);
		e2x_plus =  v1 * bigger(nux_plus,nux);
		e2x_minus = v1 * bigger(nux_minus,nux);

		nuy_plus =  fabs(r[i][j+2][k]-2.0*r[i][j+1][k]+r[i][j][k])/(4.0*rho_air);
		nuy =       fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);
		nuy_minus = fabs(r[i][j][k]-2.0*r[i][j-1][k]+r[i][j-2][k])/(4.0*rho_air);
		e2y_plus =  v1 * bigger(nuy_plus,nuy);
		e2y_minus = v1 * bigger(nuy_minus,nuy);

		nuz_plus =  fabs(r[i][j][k+2]-2.0*r[i][j][k+1]+r[i][j][k])/(4.0*rho_air);
		nuz =       fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);
		nuz_minus = fabs(r[i][j][k]-2.0*r[i][j][k-1]+r[i][j][k-2])/(4.0*rho_air);
		e2z_plus =  v1 * bigger(nuz_plus,nuz);
		e2z_minus = v1 * bigger(nuz_minus,nuz);

		val = e2x_plus*(q[i+1][j][k] - q[i][j][k]) - e2x_minus*(q[i][j][k] - q[i-1][j][k])
	    	+ e2y_plus*(q[i][j+1][k] - q[i][j][k]) - e2y_minus*(q[i][j][k] - q[i][j-1][k]);
	    	+ e2z_plus*(q[i][j][k+1] - q[i][j][k]) - e2z_minus*(q[i][j][k] - q[i][j][k-1]);

		e4x_plus =  bigger(0.0,v2 - e2x_plus);
		e4x_minus = bigger(0.0,v2 - e2x_minus);
		e4y_plus =  bigger(0.0,v2 - e2y_plus);
		e4y_minus = bigger(0.0,v2 - e2y_minus);
		e4z_plus =  bigger(0.0,v2 - e2z_plus);
		e4z_minus = bigger(0.0,v2 - e2z_minus);

		val -= e4x_plus * (q[i+2][j][k]-3.0*q[i+1][j][k]+3.0*q[i][j][k]-q[i-1][j][k])
	     	- e4x_minus *(q[i+1][j][k]-3.0*q[i][j][k]+3.0*q[i-1][j][k]-q[i-2][j][k])
	     	+ e4y_plus * (q[i][j+2][k]-3.0*q[i][j+1][k]+3.0*q[i][j][k]-q[i][j-1][k])
	     	- e4y_minus *(q[i][j+1][k]-3.0*q[i][j][k]+3.0*q[i][j-1][k]-q[i][j-2][k])
	     	+ e4z_plus * (q[i][j][k+2]-3.0*q[i][j][k+1]+3.0*q[i][j][k]-q[i][j][k-1])
	     	- e4z_minus *(q[i][j][k+1]-3.0*q[i][j][k]+3.0*q[i][j][k-1]-q[i][j][k-2]);
	}
	else if((geometry[i+1][j][k] != OPEN) && (geometry[i-1][j][k] == OPEN) 
		&& (geometry[i][j+1][k] == OPEN) && (geometry[i][j-1][k] == OPEN)
		&& (geometry[i][j][k+1] == OPEN) && (geometry[i][j][k-1] == OPEN)) {

                nux =       fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
                nux_minus = fabs(r[i][j][k]-2.0*r[i-1][j][k]+r[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * nux;
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_plus =  fabs(r[i][j+2][k]-2.0*r[i][j+1][k]+r[i][j][k])/(4.0*rho_air);
                nuy =       fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(r[i][j][k]-2.0*r[i][j-1][k]+r[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

		nuz_plus =  fabs(r[i][j][k+2]-2.0*r[i][j][k+1]+r[i][j][k])/(4.0*rho_air);
		nuz =       fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);
		nuz_minus = fabs(r[i][j][k]-2.0*r[i][j][k-1]+r[i][j][k-2])/(4.0*rho_air);
		e2z_plus =  v1 * bigger(nuz_plus,nuz);
		e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(q[i+1][j][k] - q[i][j][k]) - e2x_minus*(q[i][j][k] - q[i-1][j][k])
                    + e2y_plus*(q[i][j+1][k] - q[i][j][k]) - e2y_minus*(q[i][j][k] - q[i][j-1][k])
                    + e2z_plus*(q[i][j][k+1] - q[i][j][k]) - e2z_minus*(q[i][j][k] - q[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

// I am not sure about the leading "(" in the next line
                val -= -(e4x_plus * (q[i+1][j][k]-3.0*q[i][j][k]+3.0*q[i-1][j][k]-q[i-2][j][k])
                     - e4x_minus *(q[i][j][k]-3.0*q[i-1][j][k]+3.0*q[i-2][j][k]-q[i-3][j][k]))
                     + e4y_plus * (q[i][j+2][k]-3.0*q[i][j+1][k]+3.0*q[i][j][k]-q[i][j-1][k])
                     - e4y_minus *(q[i][j+1][k]-3.0*q[i][j][k]+3.0*q[i][j-1][k]-q[i][j-2][k])
                     + e4z_plus * (q[i][j][k+2]-3.0*q[i][j][k+1]+3.0*q[i][j][k]-q[i][j][k-1])
                     - e4z_minus *(q[i][j][k+1]-3.0*q[i][j][k]+3.0*q[i][j][k-1]-q[i][j][k-2]);
        }
	else if((geometry[i+1][j][k] == OPEN) && (geometry[i-1][j][k] != OPEN) 
		&& (geometry[i][j+1][k] == OPEN) && (geometry[i][j-1][k] == OPEN)
		&& (geometry[i][j][k+1] == OPEN) && (geometry[i][j][k-1] == OPEN)) {

		nux_plus =  fabs(r[i+2][j][k]-2.0*r[i+1][j][k]+r[i][j][k])/(4.0*rho_air);
		nux =       fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
		e2x_plus =  v1 * bigger(nux_plus,nux);
		e2x_minus = v1 * nux;

		nuy_plus =  fabs(r[i][j+2][k]-2.0*r[i][j+1][k]+r[i][j][k])/(4.0*rho_air);
		nuy =       fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);
		nuy_minus = fabs(r[i][j][k]-2.0*r[i][j-1][k]+r[i][j-2][k])/(4.0*rho_air);
		e2y_plus =  v1 * bigger(nuy_plus,nuy);
		e2y_minus = v1 * bigger(nuy_minus,nuy);

		nuz_plus =  fabs(r[i][j][k+2]-2.0*r[i][j][k+1]+r[i][j][k])/(4.0*rho_air);
		nuz =       fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);
		nuz_minus = fabs(r[i][j][k]-2.0*r[i][j][k-1]+r[i][j][k-2])/(4.0*rho_air);
		e2z_plus =  v1 * bigger(nuz_plus,nuz);
		e2z_minus = v1 * bigger(nuz_minus,nuz);

		val = e2x_plus*(q[i+1][j][k] - q[i][j][k]) - e2x_minus*(q[i][j][k] - q[i-1][j][k])
		    + e2y_plus*(q[i][j+1][k] - q[i][j][k]) - e2y_minus*(q[i][j][k] - q[i][j-1][k])
		    + e2z_plus*(q[i][j][k+1] - q[i][j][k]) - e2z_minus*(q[i][j][k] - q[i][j][k-1]);

		e4x_plus =  bigger(0.0,v2 - e2x_plus);
		e4x_minus = bigger(0.0,v2 - e2x_minus);
		e4y_plus =  bigger(0.0,v2 - e2y_plus);
		e4y_minus = bigger(0.0,v2 - e2y_minus);
		e4z_plus =  bigger(0.0,v2 - e2z_plus);
		e4z_minus = bigger(0.0,v2 - e2z_minus);

// I am not sure about the leading "(" in the next line
		val -= -(e4x_plus * (q[i+3][j][k]-3.0*q[i+2][j][k]+3.0*q[i+1][j][k]-q[i][j][k])
		     - e4x_minus *(q[i+2][j][k]-3.0*q[i+1][j][k]+3.0*q[i][j][k]-q[i-1][j][k]))
		     + e4y_plus * (q[i][j+2][k]-3.0*q[i][j+1][k]+3.0*q[i][j][k]-q[i][j-1][k])
		     - e4y_minus *(q[i][j+1][k]-3.0*q[i][j][k]+3.0*q[i][j-1][k]-q[i][j-2][k])
		     + e4z_plus * (q[i][j][k+2]-3.0*q[i][j][k+1]+3.0*q[i][j][k]-q[i][j][k-1])
		     - e4z_minus *(q[i][j][k+1]-3.0*q[i][j][k]+3.0*q[i][j][k-1]-q[i][j][k-2]);
	}
	else if((geometry[i+1][j][k] == OPEN) && (geometry[i-1][j][k] == OPEN) 
		&& (geometry[i][j+1][k] != OPEN) && (geometry[i][j-1][k] == OPEN)
		&& (geometry[i][j][k+1] == OPEN) && (geometry[i][j][k-1] == OPEN)) {

		nux_plus =  fabs(r[i+2][j][k]-2.0*r[i+1][j][k]+r[i][j][k])/(4.0*rho_air);
		nux =       fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
		nux_minus = fabs(r[i][j][k]-2.0*r[i-1][j][k]+r[i-2][j][k])/(4.0*rho_air);
		e2x_plus =  v1 * bigger(nux_plus,nux);
		e2x_minus = v1 * bigger(nux_minus,nux);

		nuy =       fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);
		nuy_minus = fabs(r[i][j][k]-2.0*r[i][j-1][k]+r[i][j-2][k])/(4.0*rho_air);
		e2y_plus =  v1 * nuy;
		e2y_minus = v1 * bigger(nuy_minus,nuy);

		nuz_plus =  fabs(r[i][j][k+2]-2.0*r[i][j][k+1]+r[i][j][k])/(4.0*rho_air);
		nuz =       fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);
		nuz_minus = fabs(r[i][j][k]-2.0*r[i][j][k-1]+r[i][j][k-2])/(4.0*rho_air);
		e2z_plus =  v1 * bigger(nuz_plus,nuz);
		e2z_minus = v1 * bigger(nuz_minus,nuz);

		val = e2x_plus*(q[i+1][j][k] - q[i][j][k]) - e2x_minus*(q[i][j][k] - q[i-1][j][k])
		    + e2y_plus*(q[i][j+1][k] - q[i][j][k]) - e2y_minus*(q[i][j][k] - q[i][j-1][k])
		    + e2z_plus*(q[i][j][k+1] - q[i][j][k]) - e2z_minus*(q[i][j][k] - q[i][j][k-1]);

		e4x_plus =  bigger(0.0,v2 - e2x_plus);
		e4x_minus = bigger(0.0,v2 - e2x_minus);
		e4y_plus =  bigger(0.0,v2 - e2y_plus);
		e4y_minus = bigger(0.0,v2 - e2y_minus);
		e4z_plus =  bigger(0.0,v2 - e2z_plus);
		e4z_minus = bigger(0.0,v2 - e2z_minus);

// I am not sure about the leading "(" in the third line below
		val -= e4x_plus * (q[i+2][j][k]-3.0*q[i+1][j][k]+3.0*q[i][j][k]-q[i-1][j][k])
		     - e4x_minus *(q[i+1][j][k]-3.0*q[i][j][k]+3.0*q[i-1][j][k]-q[i-2][j][k])
		     -(e4y_plus * (q[i][j+1][k]-3.0*q[i][j][k]+3.0*q[i][j-1][k]-q[i][j-2][k])
		     - e4y_minus *(q[i][j][k]-3.0*q[i][j-1][k]+3.0*q[i][j-2][k]-q[i][j-3][k]))
		     + e4z_plus * (q[i][j][k+2]-3.0*q[i][j][k+1]+3.0*q[i][j][k]-q[i][j][k-1])
		     - e4z_minus *(q[i][j][k+1]-3.0*q[i][j][k]+3.0*q[i][j][k-1]-q[i][j][k-2]);
	}
	else if((geometry[i+1][j][k] == OPEN) && (geometry[i-1][j][k] == OPEN) 
		&& (geometry[i][j+1][k] == OPEN) && (geometry[i][j-1][k] != OPEN)
		&& (geometry[i][j][k+1] == OPEN) && (geometry[i][j][k-1] == OPEN)) {

		nux_plus =  fabs(r[i+2][j][k]-2.0*r[i+1][j][k]+r[i][j][k])/(4.0*rho_air);
		nux =       fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
		nux_minus = fabs(r[i][j][k]-2.0*r[i-1][j][k]+r[i-2][j][k])/(4.0*rho_air);
		e2x_plus =  v1 * bigger(nux_plus,nux);
		e2x_minus = v1 * bigger(nux_minus,nux);

		nuy_plus =  fabs(r[i][j+2][k]-2.0*r[i][j+1][k]+r[i][j][k])/(4.0*rho_air);
		nuy =       fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);
		e2y_plus =  v1 * bigger(nuy_plus,nuy);
		e2y_minus = v1 * nuy;

		nuz_plus =  fabs(r[i][j][k+2]-2.0*r[i][j][k+1]+r[i][j][k])/(4.0*rho_air);
		nuz =       fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);
		nuz_minus = fabs(r[i][j][k]-2.0*r[i][j][k-1]+r[i][j][k-2])/(4.0*rho_air);
		e2z_plus =  v1 * bigger(nuz_plus,nuz);
		e2z_minus = v1 * bigger(nuz_minus,nuz);

		val = e2x_plus*(q[i+1][j][k] - q[i][j][k]) - e2x_minus*(q[i][j][k] - q[i-1][j][k])
		    + e2y_plus*(q[i][j+1][k] - q[i][j][k]) - e2y_minus*(q[i][j][k] - q[i][j-1][k])
		    + e2z_plus*(q[i][j][k+1] - q[i][j][k]) - e2z_minus*(q[i][j][k] - q[i][j][k-1]);

		e4x_plus =  bigger(0.0,v2 - e2x_plus);
		e4x_minus = bigger(0.0,v2 - e2x_minus);
		e4y_plus =  bigger(0.0,v2 - e2y_plus);
		e4y_minus = bigger(0.0,v2 - e2y_minus);
		e4z_plus =  bigger(0.0,v2 - e2z_plus);
		e4z_minus = bigger(0.0,v2 - e2z_minus);

// I am not sure about the leading "(" in the third line below
		val -= e4x_plus * (q[i+2][j][k]-3.0*q[i+1][j][k]+3.0*q[i][j][k]-q[i-1][j][k])
		     - e4x_minus *(q[i+1][j][k]-3.0*q[i][j][k]+3.0*q[i-1][j][k]-q[i-2][j][k])
		     -(e4y_plus * (q[i][j+3][k]-3.0*q[i][j+2][k]+3.0*q[i][j+1][k]-q[i][j][k])
		     - e4y_minus *(q[i][j+2][k]-3.0*q[i][j+1][k]+3.0*q[i][j][k]-q[i][j-1][k]))
		     + e4z_plus * (q[i][j][k+2]-3.0*q[i][j][k+1]+3.0*q[i][j][k]-q[i][j][k-1])
		     - e4z_minus *(q[i][j][k+1]-3.0*q[i][j][k]+3.0*q[i][j][k-1]-q[i][j][k-2]);
	}
	else if((geometry[i+1][j][k] == OPEN) && (geometry[i-1][j][k] == OPEN) 
		&& (geometry[i][j+1][k] == OPEN) && (geometry[i][j-1][k] == OPEN)
		&& (geometry[i][j][k+1] != OPEN) && (geometry[i][j][k-1] == OPEN)) {

		nux_plus =  fabs(r[i+2][j][k]-2.0*r[i+1][j][k]+r[i][j][k])/(4.0*rho_air);
		nux =       fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
		nux_minus = fabs(r[i][j][k]-2.0*r[i-1][j][k]+r[i-2][j][k])/(4.0*rho_air);
		e2x_plus =  v1 * bigger(nux_plus,nux);
		e2x_minus = v1 * bigger(nux_minus,nux);

		nuy_plus =  fabs(r[i][j+2][k]-2.0*r[i][j+1][k]+r[i][j][k])/(4.0*rho_air);
		nuy =       fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);
		nuy_minus = fabs(r[i][j][k]-2.0*r[i][j-1][k]+r[i][j-2][k])/(4.0*rho_air);
		e2y_plus =  v1 * bigger(nuy_plus,nuy);
		e2y_minus = v1 * bigger(nuy_minus,nuy);

		nuz =       fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);
		nuz_minus = fabs(r[i][j][k]-2.0*r[i][j][k-1]+r[i][j][k-2])/(4.0*rho_air);
		e2z_plus =  v1 * nuz;
		e2z_minus = v1 * bigger(nuz_minus,nuz);

		val = e2x_plus*(q[i+1][j][k] - q[i][j][k]) - e2x_minus*(q[i][j][k] - q[i-1][j][k])
		    + e2y_plus*(q[i][j+1][k] - q[i][j][k]) - e2y_minus*(q[i][j][k] - q[i][j-1][k])
		    + e2z_plus*(q[i][j][k+1] - q[i][j][k]) - e2z_minus*(q[i][j][k] - q[i][j][k-1]);

		e4x_plus =  bigger(0.0,v2 - e2x_plus);
		e4x_minus = bigger(0.0,v2 - e2x_minus);
		e4y_plus =  bigger(0.0,v2 - e2y_plus);
		e4y_minus = bigger(0.0,v2 - e2y_minus);
		e4z_plus =  bigger(0.0,v2 - e2z_plus);
		e4z_minus = bigger(0.0,v2 - e2z_minus);

// I am not sure about the leading "(" in the fifth line below
		val -= e4x_plus * (q[i+2][j][k]-3.0*q[i+1][j][k]+3.0*q[i][j][k]-q[i-1][j][k])
		     - e4x_minus *(q[i+1][j][k]-3.0*q[i][j][k]+3.0*q[i-1][j][k]-q[i-2][j][k])
		     + e4y_plus * (q[i][j+2][k]-3.0*q[i][j+1][k]+3.0*q[i][j][k]-q[i][j-1][k])
		     - e4y_minus *(q[i][j+1][k]-3.0*q[i][j][k]+3.0*q[i][j-1][k]-q[i][j-2][k])
		     -(e4z_plus * (q[i][j][k+1]-3.0*q[i][j][k]+3.0*q[i][j][k-1]-q[i][j][k-2])
		     - e4z_minus *(q[i][j][k]-3.0*q[i][j][k-1]+3.0*q[i][j][k-2]-q[i][j][k-3]));
	}
	else if((geometry[i+1][j][k] == OPEN) && (geometry[i-1][j][k] == OPEN) 
		&& (geometry[i][j+1][k] == OPEN) && (geometry[i][j-1][k] == OPEN)
		&& (geometry[i][j][k+1] == OPEN) && (geometry[i][j][k-1] != OPEN)) {

		nux_plus =  fabs(r[i+2][j][k]-2.0*r[i+1][j][k]+r[i][j][k])/(4.0*rho_air);
		nux =       fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
		nux_minus = fabs(r[i][j][k]-2.0*r[i-1][j][k]+r[i-2][j][k])/(4.0*rho_air);
		e2x_plus =  v1 * bigger(nux_plus,nux);
		e2x_minus = v1 * bigger(nux_minus,nux);

		nuy_plus =  fabs(r[i][j+2][k]-2.0*r[i][j+1][k]+r[i][j][k])/(4.0*rho_air);
		nuy =       fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);
		nuy_minus = fabs(r[i][j][k]-2.0*r[i][j-1][k]+r[i][j-2][k])/(4.0*rho_air);
		e2y_plus =  v1 * bigger(nuy_plus,nuy);
		e2y_minus = v1 * bigger(nuy_minus,nuy);

		nuz_plus =  fabs(r[i][j][k+2]-2.0*r[i][j][k+1]+r[i][j][k])/(4.0*rho_air);
		nuz =       fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);
		e2z_plus =  v1 * bigger(nuz_plus,nuz);
		e2z_minus = v1 * nuz;

		val = e2x_plus*(q[i+1][j][k] - q[i][j][k]) - e2x_minus*(q[i][j][k] - q[i-1][j][k])
		    + e2y_plus*(q[i][j+1][k] - q[i][j][k]) - e2y_minus*(q[i][j][k] - q[i][j-1][k])
		    + e2z_plus*(q[i][j][k+1] - q[i][j][k]) - e2z_minus*(q[i][j][k] - q[i][j][k-1]);

		e4x_plus =  bigger(0.0,v2 - e2x_plus);
		e4x_minus = bigger(0.0,v2 - e2x_minus);
		e4y_plus =  bigger(0.0,v2 - e2y_plus);
		e4y_minus = bigger(0.0,v2 - e2y_minus);
		e4z_plus =  bigger(0.0,v2 - e2z_plus);
		e4z_minus = bigger(0.0,v2 - e2z_minus);

// I am not sure about the leading "(" in the fifth line below
		val -= e4x_plus * (q[i+2][j][k]-3.0*q[i+1][j][k]+3.0*q[i][j][k]-q[i-1][j][k])
		     - e4x_minus *(q[i+1][j][k]-3.0*q[i][j][k]+3.0*q[i-1][j][k]-q[i-2][j][k])
		     + e4y_plus * (q[i][j+2][k]-3.0*q[i][j+1][k]+3.0*q[i][j][k]-q[i][j-1][k])
		     - e4y_minus *(q[i][j+1][k]-3.0*q[i][j][k]+3.0*q[i][j-1][k]-q[i][j-2][k])
		     -(e4z_plus * (q[i][j][k+3]-3.0*q[i][j][k+2]+3.0*q[i][j][k+1]-q[i][j][k])
		     - e4z_minus *(q[i][j][k+2]-3.0*q[i][j][k+1]+3.0*q[i][j][k]-q[i][j][k-1]));
	}
	else {
		e2x = v1 * fabs(r[i+1][j][k]-2.0*r[i][j][k]+r[i-1][j][k])/(4.0*rho_air);
	
		e2y = v1 * fabs(r[i][j+1][k]-2.0*r[i][j][k]+r[i][j-1][k])/(4.0*rho_air);

		e2z = v1 * fabs(r[i][j][k+1]-2.0*r[i][j][k]+r[i][j][k-1])/(4.0*rho_air);

		val = e2x * (q[i+1][j][k]-2.0*q[i][j][k]+q[i-1][j][k])
		    + e2y * (q[i][j+1][k]-2.0*q[i][j][k]+q[i][j-1][k])
		    + e2z * (q[i][j][k+1]-2.0*q[i][j][k]+q[i][j][k-1]);
	}

	return(val);
}

double
bigger(double x1,double x2)
{
	if(x1 >= x2) {
		return(x1);
	}
	else {
		return(x2);
	}
}

/*
 *      this is only used for dealing with rho
 *      on the surface of the recorder
 *
 *      n = index of boundary[] that describes this point
 *      v1 and v2 are the viscosity coefficients for the recorder surface
 *
 *	IMPORTANT NOTE: the convention for XUP, XDOWN, etc. is different from in previous
 *		versions, so these equations will look a bit different from in recorder7c.c
 */
double
artificial_viscosity_on_recorder(int n,int i,int j,int k,double v1_0,double v2_0)
{
        double val;
        double e2x_plus,e4x_plus;
        double e2x_minus,e4x_minus;
        double e2x,e2y,e2z;
        double e2y_plus,e4y_plus;
        double e2y_minus,e4y_minus;
        double e2z_plus,e4z_plus;
        double e2z_minus,e4z_minus;
        double nux_plus,nux,nux_minus;
        double nuy_plus,nuy,nuy_minus;
        double nuz_plus,nuz,nuz_minus;
	double v1,v2;

	v1 = 0.0; // 0.03 * v1_0;
	v2 = 0.0; // 0.03 * v2_0;

        if(boundary_flag[n] == XUP) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * nux_plus;
                e2x_minus = e2x_plus;

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
// found a mistake on the next line in recorder7c.c !! (fixed here)
                nuy      =  fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus =  fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
// found a mistake on the next line in recorder7c.c !! (fixed here)
                nuz      =  fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus =  fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i+2][j][k] - rho[i+1][j][k]) - e2x_minus*(rho[i+1][j][k] - rho[i][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+4][j][k]-3.0*rho[i+3][j][k]+3.0*rho[i+2][j][k]-rho[i+1][j][k])
                     - e4x_minus *(rho[i+3][j][k]-3.0*rho[i+2][j][k]+3.0*rho[i+1][j][k]-rho[i][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);
        }
        else if(boundary_flag[n] == XDOWN) {

                nux_minus = fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_minus = v1 * nux_minus;
                e2x_plus =  e2x_minus;

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                nuy =       fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                nuz =       fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus = fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i][j][k] - rho[i-1][j][k]) - e2x_minus*(rho[i-1][j][k] - rho[i-2][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i][j][k]-3.0*rho[i-1][j][k]+3.0*rho[i-2][j][k]-rho[i-3][j][k])
                     - e4x_minus *(rho[i-1][j][k]-3.0*rho[i-2][j][k]+3.0*rho[i-3][j][k]-rho[i-4][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);

        }
        else if(boundary_flag[n] == YUP) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux =       fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus = fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                e2y_plus =  v1 * nuy_plus;
                e2y_minus = e2y_plus;

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                nuz =       fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus = fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j+2][k] - rho[i][j+1][k]) - e2y_minus*(rho[i][j+1][k] - rho[i][j][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j+4][k]-3.0*rho[i][j+3][k]+3.0*rho[i][j+2][k]-rho[i][j+1][k])
                     - e4y_minus *(rho[i][j+3][k]-3.0*rho[i][j+2][k]+3.0*rho[i][j+1][k]-rho[i][j][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);
        
        }
        else if(boundary_flag[n] == YDOWN) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux =       fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus = fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_minus = v1 * nuy_minus;
                e2y_plus =  e2y_minus;

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                nuz =       fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus = fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j][k] - rho[i][j-1][k]) - e2y_minus*(rho[i][j-1][k] - rho[i][j-2][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j][k]-3.0*rho[i][j-1][k]+3.0*rho[i][j-2][k]-rho[i][j-3][k])
                     - e4y_minus *(rho[i][j-1][k]-3.0*rho[i][j-2][k]+3.0*rho[i][j-3][k]-rho[i][j-4][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);

        }
        else if(boundary_flag[n] == ZUP) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux =       fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus = fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                nuy =       fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                e2z_plus =  v1 * nuz_plus;
                e2z_minus = e2z_plus;

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k+2] - rho[i][j][k+1]) - e2z_minus*(rho[i][j][k+1] - rho[i][j][k]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k+4]-3.0*rho[i][j][k+3]+3.0*rho[i][j][k+2]-rho[i][j][k+1])
                     - e4z_minus *(rho[i][j][k+3]-3.0*rho[i][j][k+2]+3.0*rho[i][j][k+1]-rho[i][j][k]);
        
	}
        else if(boundary_flag[n] == ZDOWN) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux =       fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus = fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                nuy =       fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_minus = fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_minus = v1 * nuz_minus;
                e2z_plus =  e2z_minus;

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k] - rho[i][j][k-1]) - e2z_minus*(rho[i][j][k-1] - rho[i][j][k-2]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k]-3.0*rho[i][j][k-1]+3.0*rho[i][j][k-2]-rho[i][j][k-3])
                     - e4z_minus *(rho[i][j][k-1]-3.0*rho[i][j][k-2]+3.0*rho[i][j][k-3]-rho[i][j][k-4]);

	}
        else {
                val = 0.0;
        }

        return(val);
}
/*
 *      this is only used for dealing with 
 *      on the boundaries of the simulation region
 *
 *      v1 and v2 are the viscosity coefficients
 *
 *      note that the four corners are skipped - no viscosity there
 */
double
artificial_viscosity_on_boundary(int i,int j,int k,double v1_0,double v2_0)
{
        double val;
        double e2x_plus,e4x_plus;
        double e2x_minus,e4x_minus;
        double e2x,e2y,e2z;
        double e2y_plus,e4y_plus;
        double e2y_minus,e4y_minus;
        double e2z_plus,e4z_plus;
        double e2z_minus,e4z_minus;
        double nux_plus,nux,nux_minus;
        double nuy_plus,nuy,nuy_minus;
        double nuz_plus,nuz,nuz_minus;
	double v1,v2;

//	return(0.0);	// no AV on boundaries

	v1 = 1.0 * v1_0;
	v2 = 1.0 * v2_0;

// first the surface in the y = 0 plane 
// found a mistake in this line (and similar ones that follow) in recorder7c.c -- but since
//	i_max > j_max it probably did not matter!!
        if((j == 0) && ((i > 1) && (i < i_max-1)) && ((k > 1) && (k < k_max-1))) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux      =  fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus =  fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                e2y_plus =  v1 * nuy_plus;
                e2y_minus = e2y_plus;

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                nuz      =  fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus =  fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j+2][k] - rho[i][j+1][k]) - e2y_minus*(rho[i][j+1][k] - rho[i][j][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j+4][k]-3.0*rho[i][j+3][k]+3.0*rho[i][j+2][k]-rho[i][j+1][k])
                     - e4y_minus *(rho[i][j+3][k]-3.0*rho[i][j+2][k]+3.0*rho[i][j+1][k]-rho[i][j][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);
// fix the above lines in v. 4 here and below
        }
// next the surface in the y = y_max plane
        else if((j == j_max) && ((i > 1) && (i < i_max-1)) && ((k > 1) && (k < k_max-1))) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux      =  fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus =  fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_minus = v1 * nuy_minus;
                e2y_plus =  e2y_minus;

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                nuz      =  fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus =  fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j][k] - rho[i][j-1][k]) - e2y_minus*(rho[i][j-1][k] - rho[i][j-2][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j][k]-3.0*rho[i][j-1][k]+3.0*rho[i][j-2][k]-rho[i][j-3][k])
                     - e4y_minus *(rho[i][j-1][k]-3.0*rho[i][j-2][k]+3.0*rho[i][j-3][k]-rho[i][j-4][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);
        
        }
// next the surface in the x = 0 plane
        else if((i == 0) && ((j > 1) && (j < j_max-1)) && ((k > 1) && (k < k_max-1))) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * nux_plus;
// another error found here
                e2x_minus = e2x_plus;

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                nuy =       fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                nuz      =  fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus =  fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i+2][j][k] - rho[i+1][j][k]) - e2x_minus*(rho[i+1][j][k] - rho[i][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);
// found this mistake in v8m2!  -- fix it in other sections of this program too
//                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i-1][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+4][j][k]-3.0*rho[i+3][j][k]+3.0*rho[i+2][j][k]-rho[i+1][j][k])
                     - e4x_minus *(rho[i+3][j][k]-3.0*rho[i+2][j][k]+3.0*rho[i+1][j][k]-rho[i][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);
        }
// next the surface in the x = x_max plane
        else if((i == i_max) && ((j > 1) && (j < j_max-1)) && ((k > 1) && (k < k_max-1))) {

                nux_minus = fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_minus = v1 * nux_minus;
                e2x_plus =  e2x_minus;

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                nuy =       fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                nuz      =  fabs(rho[i][j][k+1]-2.0*rho[i][j][k]+rho[i][j][k-1])/(4.0*rho_air);
                nuz_minus =  fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_plus =  v1 * bigger(nuz_plus,nuz);
                e2z_minus = v1 * bigger(nuz_minus,nuz);

                val = e2x_plus*(rho[i][j][k] - rho[i-1][j][k]) - e2x_minus*(rho[i-1][j][k] - rho[i-2][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k+1] - rho[i][j][k]) - e2z_minus*(rho[i][j][k] - rho[i][j][k-1]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i][j][k]-3.0*rho[i-1][j][k]+3.0*rho[i-2][j][k]-rho[i-3][j][k])
                     - e4x_minus *(rho[i-1][j][k]-3.0*rho[i-2][j][k]+3.0*rho[i-3][j][k]-rho[i-4][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k+2]-3.0*rho[i][j][k+1]+3.0*rho[i][j][k]-rho[i][j][k-1])
                     - e4z_minus *(rho[i][j][k+1]-3.0*rho[i][j][k]+3.0*rho[i][j][k-1]-rho[i][j][k-2]);

        }
// next the surface in the z = 0 plane
        else if((k == 0) && ((i > 1) && (i < i_max-1)) && ((j > 1) && (j < j_max-1))) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux      =  fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus =  fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                nuy =       fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_plus =  fabs(rho[i][j][k+2]-2.0*rho[i][j][k+1]+rho[i][j][k])/(4.0*rho_air);
                e2z_plus =  v1 * nuz_plus;
                e2z_minus = e2z_plus;

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k+2] - rho[i][j][k+1]) - e2z_minus*(rho[i][j][k+1] - rho[i][j][k]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k+4]-3.0*rho[i][j][k+3]+3.0*rho[i][j][k+2]-rho[i][j][k+1])
                     - e4z_minus *(rho[i][j][k+3]-3.0*rho[i][j][k+2]+3.0*rho[i][j][k+1]-rho[i][j][k]);
        }
// next the surface in the z = z_max plane
        else if((k == k_max) && ((i > 1) && (i < i_max-1)) && ((j > 1) && (j < j_max-1))) {

                nux_plus =  fabs(rho[i+2][j][k]-2.0*rho[i+1][j][k]+rho[i][j][k])/(4.0*rho_air);
                nux      =  fabs(rho[i+1][j][k]-2.0*rho[i][j][k]+rho[i-1][j][k])/(4.0*rho_air);
                nux_minus =  fabs(rho[i][j][k]-2.0*rho[i-1][j][k]+rho[i-2][j][k])/(4.0*rho_air);
                e2x_plus =  v1 * bigger(nux_plus,nux);
                e2x_minus = v1 * bigger(nux_minus,nux);

                nuy_plus =  fabs(rho[i][j+2][k]-2.0*rho[i][j+1][k]+rho[i][j][k])/(4.0*rho_air);
                nuy =       fabs(rho[i][j+1][k]-2.0*rho[i][j][k]+rho[i][j-1][k])/(4.0*rho_air);
                nuy_minus = fabs(rho[i][j][k]-2.0*rho[i][j-1][k]+rho[i][j-2][k])/(4.0*rho_air);
                e2y_plus =  v1 * bigger(nuy_plus,nuy);
                e2y_minus = v1 * bigger(nuy_minus,nuy);

                nuz_minus = fabs(rho[i][j][k]-2.0*rho[i][j][k-1]+rho[i][j][k-2])/(4.0*rho_air);
                e2z_minus = v1 * nuz_minus;
                e2z_plus =  e2z_minus;

                val = e2x_plus*(rho[i+1][j][k] - rho[i][j][k]) - e2x_minus*(rho[i][j][k] - rho[i-1][j][k])
                    + e2y_plus*(rho[i][j+1][k] - rho[i][j][k]) - e2y_minus*(rho[i][j][k] - rho[i][j-1][k])
                    + e2z_plus*(rho[i][j][k] - rho[i][j][k-1]) - e2z_minus*(rho[i][j][k-1] - rho[i][j][k-2]);

                e4x_plus =  bigger(0.0,v2 - e2x_plus);
                e4x_minus = bigger(0.0,v2 - e2x_minus);
                e4y_plus =  bigger(0.0,v2 - e2y_plus);
                e4y_minus = bigger(0.0,v2 - e2y_minus);
                e4z_plus =  bigger(0.0,v2 - e2z_plus);
                e4z_minus = bigger(0.0,v2 - e2z_minus);

                val -= e4x_plus * (rho[i+2][j][k]-3.0*rho[i+1][j][k]+3.0*rho[i][j][k]-rho[i-1][j][k])
                     - e4x_minus *(rho[i+1][j][k]-3.0*rho[i][j][k]+3.0*rho[i-1][j][k]-rho[i-2][j][k])
                     + e4y_plus * (rho[i][j+2][k]-3.0*rho[i][j+1][k]+3.0*rho[i][j][k]-rho[i][j-1][k])
                     - e4y_minus *(rho[i][j+1][k]-3.0*rho[i][j][k]+3.0*rho[i][j-1][k]-rho[i][j-2][k])
                     + e4z_plus * (rho[i][j][k]-3.0*rho[i][j][k-1]+3.0*rho[i][j][k-2]-rho[i][j][k-3])
                     - e4z_minus *(rho[i][j][k-1]-3.0*rho[i][j][k-2]+3.0*rho[i][j][k-3]-rho[i][j][k-4]);

        }
        else {
                val = 0.0;
        }

        return(val);
}

/*
 *	reset (i.e., set to zero) the arrays used to accumulate averaging data for rho, u, and v
 */
zero_filtering_data()
{
	int i,j,k;

{

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u_ave,v_ave,w_ave,rho_ave)  private(i,j,k) 
#endif

	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				rho_ave[i][j][k] = u_ave[i][j][k] = v_ave[i][j][k] = w_ave[i][j][k] = 0.0;
			}
		}
	}

}

	return;
}

/*
 *	accumulate data for rho, u, and v as part of filtering process
 */
accum_filtering_data()
{
	int i,j,k;

{

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(u,v,w,rho,u_ave,v_ave,w_ave,rho_ave)  private(i,j,k) 
#endif

	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				rho_ave[i][j][k] += rho[i][j][k];
				u_ave[i][j][k] += u[i][j][k];
				v_ave[i][j][k] += v[i][j][k];
				w_ave[i][j][k] += w[i][j][k];
			}
		}
	}

}

	return;
}

/*
 *	replace current data for rho, u, and v with data accumulated by the filtering
 */
double
reset_filtering_data(int n,double t_a)
{
	int i,j,k;
	double new_t;

	new_t = t_a - t_accum/2.0;
	fprintf(fp_log,"finish filtering rho, u, and v at t = %g\n\treset t to %g\n",t_a,new_t);
	fflush(fp_log);
{

#ifdef CLUSTER
#pragma omp collapse(3)
#pragma omp parallel for shared(n,u,v,rho,w,w_ave,u_ave,v_ave,rho_ave)  private(i,j,k) 
#endif

	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				rho[i][j][k] = rho_ave[i][j][k] / ((double)n); 
				u[i][j][k] = u_ave[i][j][k] / ((double)n);
				v[i][j][k] = v_ave[i][j][k] / ((double)n);
				w[i][j][k] = w_ave[i][j][k] / ((double)n);
			}
		}
	}

}

	return(new_t);
}

/*
 *	second deriv of q with respect to x at grid point i,j,k
 */
double
// second_deriv_x(float q[][N_Y_MAX][N_Z_MAX],int i, int j, int k)
second_deriv_x(float ***q,int i, int j, int k)
{
	double val;

	val = (2.0/(dx_grid[i]+dx_grid[i-1]))*
		(((q[i+1][j][k]-q[i][j][k])/(dx_grid[i])) - ((q[i][j][k]-q[i-1][j][k])/(dx_grid[i-1])));

	return(val);
}

/*
 *	second deriv of q with respect to y at grid point i,j,k
 */
double
second_deriv_y(float ***q,int i, int j, int k)
{
	double val;

	val = (2.0/(dy_grid[j]+dy_grid[j-1]))*
		(((q[i][j+1][k]-q[i][j][k])/(dy_grid[j])) - ((q[i][j][k]-q[i][j-1][k])/(dy_grid[j-1])));

	return(val);
}

/*
 *	second deriv of q with respect to z at grid point i,j,k
 */
double
second_deriv_z(float ***q,int i, int j, int k)
{
	double val;

	val = (2.0/(dz_grid[k]+dz_grid[k-1]))*
		(((q[i][j][k+1]-q[i][j][k])/(dz_grid[k])) - ((q[i][j][k]-q[i][j][k-1])/(dz_grid[k-1])));

	return(val);
}

/*
 *	general routine to dynamically creat/allocate
 *	a three dimensional array of floats of size [nx][ny][nz]
 *
 *	the user passes to the subroutine a pointer of type (float ***) and 
 *	three integers which are the desired dimensions of the array
 *
 *	the routine allocates memory for the array which can then be 
 *	passed to other routines as simply a and accessed as a[i][j][k]
 *
 *	at the end of the program the routine cleanup_array() should be called
 *	to free the allocated memory
 */

float ***allocate_array_of_floats(int nx, int ny, int nz)
{
	float ***a;
	int i,j;

	a = (float ***)malloc(nx * sizeof(float **));
	if(a == NULL) {
		fprintf(stderr,"first malloc failed");
		exit(0);
	}
	a[0] = (float **)malloc(nx * ny * sizeof(float *));
	if(a[0] == NULL) {
		fprintf(stderr,"second (medium size) malloc failed");
		exit(0);
	}
	a[0][0] = (float *)malloc(nx * ny * nz * sizeof(float));
	if(a[0][0] == NULL) {
		fprintf(stderr,"third (big) malloc failed");
		exit(0);
	}
	for(j = 1; j < ny; j++) {
		a[0][j] = a[0][j-1] + nz;
	}
	for(i = 1; i < nx; i++) {
		a[i] = a[i-1] + ny;
		a[i][0] = a[i-1][ny - 1] + nz;
		for(j = 1; j < ny; j++) {
			a[i][j] = a[i][j-1] + nz;
		}
	}

	return(a);
}

cleanup_array(float ***a)
{
	free(a[0][0]);
	free(a[0]);
	free(a);

	return;
}

/* 
 *	routine to allocate a 3d array of integers
 */

int ***allocate_array_of_integers(int nx, int ny, int nz)
{
	int ***a;
	int i,j;

	a = (int ***)malloc(nx * sizeof(int **));
	if(a == NULL) {
		fprintf(stderr,"first malloc failed");
		exit(0);
	}
	a[0] = (int **)malloc(nx * ny * sizeof(int *));
	if(a[0] == NULL) {
		fprintf(stderr,"second (medium size) malloc failed");
		exit(0);
	}
	a[0][0] = (int *)malloc(nx * ny * nz * sizeof(int));
	if(a[0][0] == NULL) {
		fprintf(stderr,"third (big) malloc failed");
		exit(0);
	}
	for(j = 1; j < ny; j++) {
		a[0][j] = a[0][j-1] + nz;
	}
	for(i = 1; i < nx; i++) {
		a[i] = a[i-1] + ny;
		a[i][0] = a[i-1][ny - 1] + nz;
		for(j = 1; j < ny; j++) {
			a[i][j] = a[i][j-1] + nz;
		}
	}

	return(a);
}
/*
 *	allocate space for the many arrays of floats that are needed
 *	make them all of size [i_max+2][j_max+2][k_max+2] just to be sure
 */

create_arrays()
{

	rho = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	u = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	v = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	w = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);

	rho_predictor = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	u_predictor = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	v_predictor = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	w_predictor = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);

	rho_ave = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	u_ave = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	v_ave = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	w_ave = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);

	rho_buf = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	u_buf = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	v_buf = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);
	w_buf = allocate_array_of_floats(i_max+2,j_max+2,k_max+2);

	return;
}

/* 
 *	free up allocated memory
 */

cleanup_memory()
{

	free(geometry[0][0]);
	free(geometry[0]);
	free(geometry);

	cleanup_array(rho); 
	cleanup_array(u); 
	cleanup_array(v);
	cleanup_array(w); 

	cleanup_array(rho_predictor); 
	cleanup_array(u_predictor); 
	cleanup_array(v_predictor);
	cleanup_array(w_predictor); 

	cleanup_array(rho_ave); 
	cleanup_array(u_ave); 
	cleanup_array(v_ave); 
	cleanup_array(w_ave);

	cleanup_array(rho_buf); 
	cleanup_array(u_buf); 
	cleanup_array(v_buf);
	cleanup_array(w_buf); 

	return;
}

my_exit()
{
	fflush(fp_log);
	exit(0);
}


//	like save_for_restart but save the predictors instead
//
save_predictor()
{
	int i,j,k;
	FILE *fp_map_restart;
	FILE *fp_t_restart;
	char tmp[300],tmp2[300],mes[400];
	static int first_time = YES;

	sprintf(tmp,"%s/%s.restart_maps.dat",scratch_directory,results_directory);

// before saving the latest maps, back up the previous restart info, just in case 
// the program stops in the middle of saving this stuff
// make some small changes here (10-28-12) to have better safety
	if(first_time == NO) {
		sprintf(tmp2,"%s/%s.backup_restart_maps",scratch_directory,results_directory);
		sprintf(mes,"cp %s %s\n",tmp,tmp2); 
		system(mes);
	}

	fp_map_restart = fopen(tmp,"w");

	sprintf(tmp,"%s/t_restart",results_directory_full_path);

// also save the the previous restart time
	if(first_time == NO) {
		sprintf(tmp2,"%s/backup_t_restart.dat",results_directory_full_path);
		sprintf(mes,"cp %s %s\n",tmp,tmp2); 
		system(mes);
	}

	fp_t_restart = fopen(tmp,"w");

	fprintf(fp_t_restart,"%g\n",t);
	for(i = 0; i <= i_max; i++) {
		for(j = 0; j <= j_max; j++) {
			for(k = 0; k <= k_max; k++) {
				fprintf(fp_map_restart,"%d\t%d\t%d\t%g\t%g\t%g\t%g\n",i,j,k,
					rho_predictor[i][j][k],u_predictor[i][j][k],v_predictor[i][j][k],w_predictor[i][j][k]);
			}
		}
	}

	fclose(fp_t_restart);
	fclose(fp_map_restart);
	first_time = NO;

	return;
}

